diff --git a/src/recording/composer.ts b/src/recording/composer.ts index a5bdc61..952c40f 100644 --- a/src/recording/composer.ts +++ b/src/recording/composer.ts @@ -60,6 +60,11 @@ type VideoTimelineSegment = { }; const jobs: Map = new Map(); +const COMPOSITION_OUTPUT_WIDTH = 2560; +const COMPOSITION_OUTPUT_HEIGHT = 1440; +const COMPOSITION_OUTPUT_FPS = 60; +const COMPOSITION_HOST_HEIGHT = 1080; +const COMPOSITION_VIDEO_BITRATE = '16000k'; function nowIso(): string { return new Date().toISOString(); @@ -207,6 +212,19 @@ function formatSeconds(value: number): string { return value.toFixed(3).replace(/\.?0+$/, ''); } +function getDurationBoundVideoFilters(segmentDurationSeconds: number | null): string[] { + if (segmentDurationSeconds === null) { + return []; + } + + const duration = formatSeconds(segmentDurationSeconds); + return [ + `tpad=stop_mode=clone:stop_duration=${duration}`, + `trim=duration=${duration}`, + 'setpts=PTS-STARTPTS' + ]; +} + function getBottomTileWidth(index: number, inputCount: number, outputWidth: number): number { const sideCount = inputCount - 1; if (sideCount <= 1) { @@ -304,9 +322,10 @@ export function buildFfmpegCompositionArgs(input: { outputPath: string; format: string; }): string[] { - const outputWidth = 1280; - const outputHeight = 720; - const hostHeight = 540; + const outputWidth = COMPOSITION_OUTPUT_WIDTH; + const outputHeight = COMPOSITION_OUTPUT_HEIGHT; + const outputFps = COMPOSITION_OUTPUT_FPS; + const hostHeight = COMPOSITION_HOST_HEIGHT; const bottomHeight = outputHeight - hostHeight; const videoInputs = orderVideoInputsForComposition(input.videoInputs); const timelineOriginMs = getTimelineOriginMs(videoInputs.concat(input.audioInputs)); @@ -343,7 +362,7 @@ export function buildFfmpegCompositionArgs(input: { if (segmentDurationSeconds === null) { return; } - filters.push(`color=color=black:size=${outputWidth}x${outputHeight}:rate=30:duration=${formatSeconds(segmentDurationSeconds)},format=yuv420p[seg${segmentIndex}]`); + filters.push(`color=color=black:size=${outputWidth}x${outputHeight}:rate=${outputFps}:duration=${formatSeconds(segmentDurationSeconds)},format=yuv420p[seg${segmentIndex}]`); return; } @@ -365,16 +384,24 @@ export function buildFfmpegCompositionArgs(input: { if (segmentDurationSeconds !== null) { trimOptions.push(`duration=${formatSeconds(segmentDurationSeconds)}`); } - filters.push(`[${inputLabel}]trim=${trimOptions.join(':')},setpts=PTS-STARTPTS,scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1[seg${segmentIndex}v${activeIndex}]`); + const videoFilters = [ + `trim=${trimOptions.join(':')}`, + 'setpts=PTS-STARTPTS', + ...getDurationBoundVideoFilters(segmentDurationSeconds), + `scale=${width}:${height}:force_original_aspect_ratio=decrease`, + `pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:black`, + 'setsar=1' + ]; + filters.push(`[${inputLabel}]${videoFilters.join(',')}[seg${segmentIndex}v${activeIndex}]`); }); if (segment.activeInputs.length === 1) { - filters.push(`[seg${segmentIndex}v0]fps=30,format=yuv420p[seg${segmentIndex}]`); + filters.push(`[seg${segmentIndex}v0]fps=${outputFps},format=yuv420p[seg${segmentIndex}]`); return; } const segmentVideoLabels = segment.activeInputs.map((_file, activeIndex) => `[seg${segmentIndex}v${activeIndex}]`).join(''); - filters.push(`${segmentVideoLabels}xstack=inputs=${segment.activeInputs.length}:layout=${createHostBottomLayout(segment.activeInputs.length, outputWidth, hostHeight)}:fill=black,fps=30,format=yuv420p[seg${segmentIndex}]`); + filters.push(`${segmentVideoLabels}xstack=inputs=${segment.activeInputs.length}:layout=${createHostBottomLayout(segment.activeInputs.length, outputWidth, hostHeight)}:fill=black,fps=${outputFps},format=yuv420p[seg${segmentIndex}]`); }); if (videoSegments.length === 1) { @@ -388,7 +415,7 @@ export function buildFfmpegCompositionArgs(input: { const audioInputIndex = videoInputs.length; const offsetMs = Math.round(getInputOffsetSeconds(input.audioInputs[0], timelineOriginMs) * 1000); const offsetFilter = offsetMs > 1 ? `,adelay=${offsetMs}:all=1` : ''; - filters.push(`[${audioInputIndex}:a]aresample=async=1:first_pts=0${offsetFilter}[aout]`); + filters.push(`[${audioInputIndex}:a]aresample=async=1:first_pts=0${offsetFilter},asetpts=N/SR/TB[aout]`); } else if (input.audioInputs.length > 1) { const audioLabels = input.audioInputs.map((file, index) => { const audioInputIndex = videoInputs.length + index; @@ -397,7 +424,7 @@ export function buildFfmpegCompositionArgs(input: { filters.push(`[${audioInputIndex}:a]aresample=async=1:first_pts=0${offsetFilter}[a${index}]`); return `[a${index}]`; }).join(''); - filters.push(`${audioLabels}amix=inputs=${input.audioInputs.length}:duration=longest:dropout_transition=2[aout]`); + filters.push(`${audioLabels}amix=inputs=${input.audioInputs.length}:duration=longest:dropout_transition=2,asetpts=N/SR/TB[aout]`); } args.push('-filter_complex', filters.join(';'), '-map', '[vout]'); @@ -406,12 +433,12 @@ export function buildFfmpegCompositionArgs(input: { } if (input.format === 'mp4') { - args.push('-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p'); + args.push('-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', '-b:v', COMPOSITION_VIDEO_BITRATE, '-r', String(outputFps)); if (input.audioInputs.length > 0) { args.push('-c:a', 'aac'); } } else { - args.push('-c:v', 'libvpx-vp9', '-deadline', 'realtime', '-cpu-used', '4'); + args.push('-c:v', 'libvpx-vp9', '-deadline', 'good', '-cpu-used', '4', '-b:v', COMPOSITION_VIDEO_BITRATE, '-r', String(outputFps)); if (input.audioInputs.length > 0) { args.push('-c:a', 'libopus'); } diff --git a/src/recording/werift-adapter.ts b/src/recording/werift-adapter.ts index c1c9040..f864d30 100644 --- a/src/recording/werift-adapter.ts +++ b/src/recording/werift-adapter.ts @@ -20,6 +20,10 @@ const werift = require('werift'); const RTCPeerConnection = werift.RTCPeerConnection; const weriftNonstandard = require('werift/nonstandard'); const MediaRecorder = weriftNonstandard.MediaRecorder; +const SERVER_RECORDING_WIDTH = 2560; +const SERVER_RECORDING_HEIGHT = 1440; +const SERVER_RECORDING_JITTER_BUFFER_LATENCY_MS = 1000; +const SERVER_RECORDING_JITTER_BUFFER_SIZE = 50000; type RecordingPeerState = { pc: WeriftPeerConnection; @@ -146,9 +150,13 @@ function startTrackRecorder(input: { const recorder = new MediaRecorder({ path: target.filePath, tracks: [input.track], - width: 1280, - height: 720, + width: SERVER_RECORDING_WIDTH, + height: SERVER_RECORDING_HEIGHT, disableLipSync: true, + jitterBuffer: { + latency: SERVER_RECORDING_JITTER_BUFFER_LATENCY_MS, + bufferSize: SERVER_RECORDING_JITTER_BUFFER_SIZE + }, defaultDuration: 24 * 60 * 60 }); diff --git a/test/recording-composer.test.ts b/test/recording-composer.test.ts index ecbdb7a..8c02868 100644 --- a/test/recording-composer.test.ts +++ b/test/recording-composer.test.ts @@ -43,12 +43,14 @@ describe('recording composer', () => { expect(args).toContain('-filter_complex'); expect(args.join(' ')).toContain('xstack=inputs=2'); - expect(args.join(' ')).toContain('scale=1280:540'); - expect(args.join(' ')).toContain('scale=1280:180'); - expect(args.join(' ')).toContain('layout=0_0|0_540'); + expect(args.join(' ')).toContain('scale=2560:1080'); + expect(args.join(' ')).toContain('scale=2560:360'); + expect(args.join(' ')).toContain('layout=0_0|0_1080'); + expect(args.join(' ')).toContain('fps=60'); expect(args.join(' ')).toContain('amix=inputs=2'); expect(args).toContain('libvpx-vp9'); expect(args).toContain('libopus'); + expect(args).toContain('16000k'); expect(args).not.toContain('-shortest'); expect(args[args.length - 1]).toBe('recordings/room-1/output.webm'); }); @@ -75,9 +77,9 @@ describe('recording composer', () => { '-i', 'recordings/room-1/p2-video.webm' ]); - expect(filter).toContain('scale=1280:540'); - expect(filter).toContain('scale=640:180'); - expect(filter).toContain('layout=0_0|0_540|640_540'); + expect(filter).toContain('scale=2560:1080'); + expect(filter).toContain('scale=1280:360'); + expect(filter).toContain('layout=0_0|0_1080|1280_1080'); }); test('builds mp4 encoder args', () => { @@ -90,6 +92,8 @@ describe('recording composer', () => { expect(args).toContain('libx264'); expect(args).toContain('-pix_fmt'); + expect(args).toContain('16000k'); + expect(args).toContain('60'); expect(args).not.toContain('libopus'); }); @@ -128,12 +132,31 @@ describe('recording composer', () => { const filter = args[args.indexOf('-filter_complex') + 1]; expect(filter).toContain('[0:v]split=2[vin0_0][vin0_1]'); expect(filter).toContain('[vin0_0]trim=start=0:duration=2.5'); + expect(filter).toContain('tpad=stop_mode=clone:stop_duration=2.5,trim=duration=2.5'); expect(filter).toContain('[vin0_1]trim=start=2.5:duration=7.5'); + expect(filter).toContain('tpad=stop_mode=clone:stop_duration=7.5,trim=duration=7.5'); expect(filter).toContain('[1:v]trim=start=0:duration=7.5'); expect(filter).toContain('concat=n=2:v=1:a=0[vout]'); expect(filter).toContain('[2:a]aresample=async=1:first_pts=0[a0]'); expect(filter).toContain('[3:a]aresample=async=1:first_pts=0,adelay=2500:all=1[a1]'); - expect(filter).toContain('[a0][a1]amix=inputs=2:duration=longest'); + expect(filter).toContain('[a0][a1]amix=inputs=2:duration=longest:dropout_transition=2,asetpts=N/SR/TB[aout]'); + }); + + test('bounds each video segment to its timeline duration before composition', () => { + const args = buildFfmpegCompositionArgs({ + videoInputs: [ + file('host-video.webm', 'video', 'host', 'host', '2026-06-01T00:00:00.000Z', '2026-06-01T00:00:24.000Z') + ], + audioInputs: [ + file('host-audio.webm', 'audio', 'host', 'host', '2026-06-01T00:00:00.000Z', '2026-06-01T00:00:24.000Z') + ], + outputPath: 'recordings/room-1/output.webm', + format: 'webm' + }); + + const filter = args[args.indexOf('-filter_complex') + 1]; + expect(filter).toContain('trim=start=0:duration=24,setpts=PTS-STARTPTS,tpad=stop_mode=clone:stop_duration=24,trim=duration=24,setpts=PTS-STARTPTS'); + expect(filter).toContain('[1:a]aresample=async=1:first_pts=0,asetpts=N/SR/TB[aout]'); }); test('changes the layout when participants join and leave without overlapping', () => { @@ -150,7 +173,7 @@ describe('recording composer', () => { const filter = args[args.indexOf('-filter_complex') + 1]; expect(filter).toContain('xstack=inputs=2'); - expect(filter).toContain('layout=0_0|0_540'); + expect(filter).toContain('layout=0_0|0_1080'); expect(filter).toContain('[0:v]split=2[vin0_0][vin0_1]'); expect(filter).toContain('[vin0_0]trim=start=0:duration=5'); expect(filter).toContain('[1:v]trim=start=0:duration=5'); @@ -174,6 +197,6 @@ describe('recording composer', () => { const filter = args[args.indexOf('-filter_complex') + 1]; expect(filter).toContain('xstack=inputs=3'); - expect(filter).toContain('layout=0_0|0_540|640_540'); + expect(filter).toContain('layout=0_0|0_1080|1280_1080'); }); });