init commit of samurai
This commit is contained in:
284
sam2/demo/frontend/src/common/codecs/VideoDecoder.ts
Normal file
284
sam2/demo/frontend/src/common/codecs/VideoDecoder.ts
Normal file
@@ -0,0 +1,284 @@
|
||||
/**
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import {cloneFrame} from '@/common/codecs/WebCodecUtils';
|
||||
import {FileStream} from '@/common/utils/FileUtils';
|
||||
import {
|
||||
createFile,
|
||||
DataStream,
|
||||
MP4ArrayBuffer,
|
||||
MP4File,
|
||||
MP4Sample,
|
||||
MP4VideoTrack,
|
||||
} from 'mp4box';
|
||||
import {isAndroid, isChrome, isEdge, isWindows} from 'react-device-detect';
|
||||
|
||||
export type ImageFrame = {
|
||||
bitmap: VideoFrame;
|
||||
timestamp: number;
|
||||
duration: number;
|
||||
};
|
||||
|
||||
export type DecodedVideo = {
|
||||
width: number;
|
||||
height: number;
|
||||
frames: ImageFrame[];
|
||||
numFrames: number;
|
||||
fps: number;
|
||||
};
|
||||
|
||||
function decodeInternal(
|
||||
identifier: string,
|
||||
onReady: (mp4File: MP4File) => Promise<void>,
|
||||
onProgress: (decodedVideo: DecodedVideo) => void,
|
||||
): Promise<DecodedVideo> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const imageFrames: ImageFrame[] = [];
|
||||
const globalSamples: MP4Sample[] = [];
|
||||
|
||||
let decoder: VideoDecoder;
|
||||
|
||||
let track: MP4VideoTrack | null = null;
|
||||
const mp4File = createFile();
|
||||
|
||||
mp4File.onError = reject;
|
||||
mp4File.onReady = async info => {
|
||||
if (info.videoTracks.length > 0) {
|
||||
track = info.videoTracks[0];
|
||||
} else {
|
||||
// The video does not have a video track, so looking if there is an
|
||||
// "otherTracks" available. Note, I couldn't find any documentation
|
||||
// about "otherTracks" in WebCodecs [1], but it was available in the
|
||||
// info for MP4V-ES, which isn't supported by Chrome [2].
|
||||
// However, we'll still try to get the track and then throw an error
|
||||
// further down in the VideoDecoder.isConfigSupported if the codec is
|
||||
// not supported by the browser.
|
||||
//
|
||||
// [1] https://www.w3.org/TR/webcodecs/
|
||||
// [2] https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Video_codecs#mp4v-es
|
||||
track = info.otherTracks[0];
|
||||
}
|
||||
|
||||
if (track == null) {
|
||||
reject(new Error(`${identifier} does not contain a video track`));
|
||||
return;
|
||||
}
|
||||
|
||||
const timescale = track.timescale;
|
||||
const edits = track.edits;
|
||||
|
||||
let frame_n = 0;
|
||||
decoder = new VideoDecoder({
|
||||
// Be careful with any await in this function. The VideoDecoder will
|
||||
// not await output and continue calling it with decoded frames.
|
||||
async output(inputFrame) {
|
||||
if (track == null) {
|
||||
reject(new Error(`${identifier} does not contain a video track`));
|
||||
return;
|
||||
}
|
||||
|
||||
const saveTrack = track;
|
||||
|
||||
// If the track has edits, we'll need to check that only frames are
|
||||
// returned that are within the edit list. This can happen for
|
||||
// trimmed videos that have not been transcoded and therefore the
|
||||
// video track contains more frames than those visually rendered when
|
||||
// playing back the video.
|
||||
if (edits != null && edits.length > 0) {
|
||||
const cts = Math.round(
|
||||
(inputFrame.timestamp * timescale) / 1_000_000,
|
||||
);
|
||||
if (cts < edits[0].media_time) {
|
||||
inputFrame.close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Workaround for Chrome where the decoding stops at ~17 frames unless
|
||||
// the VideoFrame is closed. So, the workaround here is to create a
|
||||
// new VideoFrame and close the decoded VideoFrame.
|
||||
// The frame has to be cloned, or otherwise some frames at the end of the
|
||||
// video will be black. Note, the default VideoFrame.clone doesn't work
|
||||
// and it is using a frame cloning found here:
|
||||
// https://webcodecs-blogpost-demo.glitch.me/
|
||||
if (
|
||||
(isAndroid && isChrome) ||
|
||||
(isWindows && isChrome) ||
|
||||
(isWindows && isEdge)
|
||||
) {
|
||||
const clonedFrame = await cloneFrame(inputFrame);
|
||||
inputFrame.close();
|
||||
inputFrame = clonedFrame;
|
||||
}
|
||||
|
||||
const sample = globalSamples[frame_n];
|
||||
if (sample != null) {
|
||||
const duration = (sample.duration * 1_000_000) / sample.timescale;
|
||||
imageFrames.push({
|
||||
bitmap: inputFrame,
|
||||
timestamp: inputFrame.timestamp,
|
||||
duration,
|
||||
});
|
||||
// Sort frames in order of timestamp. This is needed because Safari
|
||||
// can return decoded frames out of order.
|
||||
imageFrames.sort((a, b) => (a.timestamp < b.timestamp ? 1 : -1));
|
||||
// Update progress on first frame and then every 40th frame
|
||||
if (onProgress != null && frame_n % 100 === 0) {
|
||||
onProgress({
|
||||
width: saveTrack.track_width,
|
||||
height: saveTrack.track_height,
|
||||
frames: imageFrames,
|
||||
numFrames: saveTrack.nb_samples,
|
||||
fps:
|
||||
(saveTrack.nb_samples / saveTrack.duration) *
|
||||
saveTrack.timescale,
|
||||
});
|
||||
}
|
||||
}
|
||||
frame_n++;
|
||||
|
||||
if (saveTrack.nb_samples === frame_n) {
|
||||
// Sort frames in order of timestamp. This is needed because Safari
|
||||
// can return decoded frames out of order.
|
||||
imageFrames.sort((a, b) => (a.timestamp > b.timestamp ? 1 : -1));
|
||||
resolve({
|
||||
width: saveTrack.track_width,
|
||||
height: saveTrack.track_height,
|
||||
frames: imageFrames,
|
||||
numFrames: saveTrack.nb_samples,
|
||||
fps:
|
||||
(saveTrack.nb_samples / saveTrack.duration) *
|
||||
saveTrack.timescale,
|
||||
});
|
||||
}
|
||||
},
|
||||
error(error) {
|
||||
reject(error);
|
||||
},
|
||||
});
|
||||
|
||||
let description;
|
||||
const trak = mp4File.getTrackById(track.id);
|
||||
const entries = trak?.mdia?.minf?.stbl?.stsd?.entries;
|
||||
if (entries == null) {
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
if (entry.avcC || entry.hvcC) {
|
||||
const stream = new DataStream(undefined, 0, DataStream.BIG_ENDIAN);
|
||||
if (entry.avcC) {
|
||||
entry.avcC.write(stream);
|
||||
} else if (entry.hvcC) {
|
||||
entry.hvcC.write(stream);
|
||||
}
|
||||
description = new Uint8Array(stream.buffer, 8); // Remove the box header.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const configuration: VideoDecoderConfig = {
|
||||
codec: track.codec,
|
||||
codedWidth: track.track_width,
|
||||
codedHeight: track.track_height,
|
||||
description,
|
||||
};
|
||||
const supportedConfig =
|
||||
await VideoDecoder.isConfigSupported(configuration);
|
||||
if (supportedConfig.supported == true) {
|
||||
decoder.configure(configuration);
|
||||
|
||||
mp4File.setExtractionOptions(track.id, null, {
|
||||
nbSamples: Infinity,
|
||||
});
|
||||
mp4File.start();
|
||||
} else {
|
||||
reject(
|
||||
new Error(
|
||||
`Decoder config faile: config ${JSON.stringify(
|
||||
supportedConfig.config,
|
||||
)} is not supported`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
mp4File.onSamples = async (
|
||||
_id: number,
|
||||
_user: unknown,
|
||||
samples: MP4Sample[],
|
||||
) => {
|
||||
for (const sample of samples) {
|
||||
globalSamples.push(sample);
|
||||
decoder.decode(
|
||||
new EncodedVideoChunk({
|
||||
type: sample.is_sync ? 'key' : 'delta',
|
||||
timestamp: (sample.cts * 1_000_000) / sample.timescale,
|
||||
duration: (sample.duration * 1_000_000) / sample.timescale,
|
||||
data: sample.data,
|
||||
}),
|
||||
);
|
||||
}
|
||||
await decoder.flush();
|
||||
decoder.close();
|
||||
};
|
||||
|
||||
onReady(mp4File);
|
||||
});
|
||||
}
|
||||
|
||||
export function decode(
|
||||
file: File,
|
||||
onProgress: (decodedVideo: DecodedVideo) => void,
|
||||
): Promise<DecodedVideo> {
|
||||
return decodeInternal(
|
||||
file.name,
|
||||
async (mp4File: MP4File) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = function () {
|
||||
const result = this.result as MP4ArrayBuffer;
|
||||
if (result != null) {
|
||||
result.fileStart = 0;
|
||||
mp4File.appendBuffer(result);
|
||||
}
|
||||
mp4File.flush();
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
},
|
||||
onProgress,
|
||||
);
|
||||
}
|
||||
|
||||
export function decodeStream(
|
||||
fileStream: FileStream,
|
||||
onProgress: (decodedVideo: DecodedVideo) => void,
|
||||
): Promise<DecodedVideo> {
|
||||
return decodeInternal(
|
||||
'stream',
|
||||
async (mp4File: MP4File) => {
|
||||
let part = await fileStream.next();
|
||||
while (part.done === false) {
|
||||
const result = part.value.data.buffer as MP4ArrayBuffer;
|
||||
if (result != null) {
|
||||
result.fileStart = part.value.range.start;
|
||||
mp4File.appendBuffer(result);
|
||||
}
|
||||
mp4File.flush();
|
||||
part = await fileStream.next();
|
||||
}
|
||||
},
|
||||
onProgress,
|
||||
);
|
||||
}
|
139
sam2/demo/frontend/src/common/codecs/VideoEncoder.ts
Normal file
139
sam2/demo/frontend/src/common/codecs/VideoEncoder.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
/**
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import {ImageFrame} from '@/common/codecs/VideoDecoder';
|
||||
import {MP4ArrayBuffer, createFile} from 'mp4box';
|
||||
|
||||
// The selection of timescale and seconds/key-frame value are
|
||||
// explained in the following docs: https://github.com/vjeux/mp4-h264-re-encode
|
||||
const TIMESCALE = 90000;
|
||||
const SECONDS_PER_KEY_FRAME = 2;
|
||||
|
||||
export function encode(
|
||||
width: number,
|
||||
height: number,
|
||||
numFrames: number,
|
||||
framesGenerator: AsyncGenerator<ImageFrame, unknown>,
|
||||
progressCallback?: (progress: number) => void,
|
||||
): Promise<MP4ArrayBuffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let encodedFrameIndex = 0;
|
||||
let nextKeyFrameTimestamp = 0;
|
||||
let trackID: number | null = null;
|
||||
const durations: number[] = [];
|
||||
|
||||
const outputFile = createFile();
|
||||
|
||||
const encoder = new VideoEncoder({
|
||||
output(chunk, metaData) {
|
||||
const uint8 = new Uint8Array(chunk.byteLength);
|
||||
chunk.copyTo(uint8);
|
||||
|
||||
const description = metaData?.decoderConfig?.description;
|
||||
if (trackID === null) {
|
||||
trackID = outputFile.addTrack({
|
||||
width: width,
|
||||
height: height,
|
||||
timescale: TIMESCALE,
|
||||
avcDecoderConfigRecord: description,
|
||||
});
|
||||
}
|
||||
const shiftedDuration = durations.shift();
|
||||
if (shiftedDuration != null) {
|
||||
outputFile.addSample(trackID, uint8, {
|
||||
duration: getScaledDuration(shiftedDuration),
|
||||
is_sync: chunk.type === 'key',
|
||||
});
|
||||
encodedFrameIndex++;
|
||||
progressCallback?.(encodedFrameIndex / numFrames);
|
||||
}
|
||||
|
||||
if (encodedFrameIndex === numFrames) {
|
||||
resolve(outputFile.getBuffer());
|
||||
}
|
||||
},
|
||||
error(error) {
|
||||
reject(error);
|
||||
return;
|
||||
},
|
||||
});
|
||||
|
||||
const setConfigurationAndEncodeFrames = async () => {
|
||||
// The codec value was taken from the following implementation and seems
|
||||
// reasonable for our use case for now:
|
||||
// https://github.com/vjeux/mp4-h264-re-encode/blob/main/mp4box.html#L103
|
||||
|
||||
// Additional details about codecs can be found here:
|
||||
// - https://developer.mozilla.org/en-US/docs/Web/Media/Formats/codecs_parameter
|
||||
// - https://www.w3.org/TR/webcodecs-codec-registry/#video-codec-registry
|
||||
//
|
||||
// The following setting is a good compromise between output video file
|
||||
// size and quality. The latencyMode "realtime" is needed for Safari,
|
||||
// which otherwise will produce 20x larger files when in quality
|
||||
// latencyMode. Chrome does a really good job with file size even when
|
||||
// latencyMode is set to quality.
|
||||
const configuration: VideoEncoderConfig = {
|
||||
codec: 'avc1.4d0034',
|
||||
width: roundToNearestEven(width),
|
||||
height: roundToNearestEven(height),
|
||||
bitrate: 14_000_000,
|
||||
alpha: 'discard',
|
||||
bitrateMode: 'variable',
|
||||
latencyMode: 'realtime',
|
||||
};
|
||||
const supportedConfig =
|
||||
await VideoEncoder.isConfigSupported(configuration);
|
||||
if (supportedConfig.supported === true) {
|
||||
encoder.configure(configuration);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unsupported video encoder config ${JSON.stringify(supportedConfig)}`,
|
||||
);
|
||||
}
|
||||
|
||||
for await (const frame of framesGenerator) {
|
||||
const {bitmap, duration, timestamp} = frame;
|
||||
durations.push(duration);
|
||||
let keyFrame = false;
|
||||
if (timestamp >= nextKeyFrameTimestamp) {
|
||||
await encoder.flush();
|
||||
keyFrame = true;
|
||||
nextKeyFrameTimestamp = timestamp + SECONDS_PER_KEY_FRAME * 1e6;
|
||||
}
|
||||
encoder.encode(bitmap, {keyFrame});
|
||||
bitmap.close();
|
||||
}
|
||||
|
||||
await encoder.flush();
|
||||
encoder.close();
|
||||
};
|
||||
|
||||
setConfigurationAndEncodeFrames();
|
||||
});
|
||||
}
|
||||
|
||||
function getScaledDuration(rawDuration: number) {
|
||||
return rawDuration / (1_000_000 / TIMESCALE);
|
||||
}
|
||||
|
||||
function roundToNearestEven(dim: number) {
|
||||
const rounded = Math.round(dim);
|
||||
|
||||
if (rounded % 2 === 0) {
|
||||
return rounded;
|
||||
} else {
|
||||
return rounded + (rounded > dim ? -1 : 1);
|
||||
}
|
||||
}
|
50
sam2/demo/frontend/src/common/codecs/WebCodecUtils.ts
Normal file
50
sam2/demo/frontend/src/common/codecs/WebCodecUtils.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
// https://github.com/w3c/webcodecs/issues/88
|
||||
// https://issues.chromium.org/issues/40725065
|
||||
// https://webcodecs-blogpost-demo.glitch.me/
|
||||
export async function cloneFrame(frame: VideoFrame): Promise<VideoFrame> {
|
||||
const {
|
||||
codedHeight,
|
||||
codedWidth,
|
||||
colorSpace,
|
||||
displayHeight,
|
||||
displayWidth,
|
||||
format,
|
||||
timestamp,
|
||||
} = frame;
|
||||
const rect = {x: 0, y: 0, width: codedWidth, height: codedHeight};
|
||||
const data = new ArrayBuffer(frame.allocationSize({rect}));
|
||||
try {
|
||||
await frame.copyTo(data, {rect});
|
||||
} catch (error) {
|
||||
// The VideoFrame#copyTo on x64 builds on macOS fails. The workaround here
|
||||
// is to clone the frame.
|
||||
// https://stackoverflow.com/questions/77898766/inconsistent-behavior-of-webcodecs-copyto-method-across-different-browsers-an
|
||||
return frame.clone();
|
||||
}
|
||||
return new VideoFrame(data, {
|
||||
codedHeight,
|
||||
codedWidth,
|
||||
colorSpace,
|
||||
displayHeight,
|
||||
displayWidth,
|
||||
duration: frame.duration ?? undefined,
|
||||
format: format!,
|
||||
timestamp,
|
||||
visibleRect: frame.visibleRect ?? undefined,
|
||||
});
|
||||
}
|
Reference in New Issue
Block a user