Skip to content

Commit 491ee57

Browse files
committed
Render subtitles
1 parent 8a08ee7 commit 491ee57

File tree

6 files changed

+185
-18
lines changed

6 files changed

+185
-18
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ tests/*
1111
*_ALTERED*
1212
*https*
1313
*.v3
14+
*.srt
1415

1516
# OS Files
1617
.DS_Store

src/render/format.nim

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import ../av
1212
import ../util/[bar, rules]
1313
import video
1414
import audio
15+
import subtitle
1516

1617
type Priority = object
1718
index: float64
@@ -145,6 +146,32 @@ proc makeMedia*(args: mainArgs, tl: v3, outputPath: string, rules: Rules, bar: B
145146
for aEncCtx in audioEncoders:
146147
avcodec_free_context(addr aEncCtx)
147148

149+
# Setup subtitle streams
150+
var subtitleStreams: seq[ptr AVStream] = @[]
151+
var subtitleSources: seq[string] = @[]
152+
153+
if not args.sn and tl.s.len > 0:
154+
for i in 0..<tl.s.len:
155+
if tl.s[i].len > 0:
156+
# Get source file and stream index from first clip
157+
let firstClip = tl.s[i].c[0]
158+
let sourcePath = firstClip.src[]
159+
let streamIdx = firstClip.stream
160+
161+
# Open source container to get subtitle stream info
162+
let srcContainer = av.open(sourcePath)
163+
if streamIdx >= srcContainer.subtitle.len:
164+
error &"Subtitle stream {streamIdx} not found in {sourcePath}"
165+
166+
let srcStream = srcContainer.subtitle[streamIdx]
167+
168+
# Add subtitle stream to output by copying from template
169+
let sOutStream = output.addStreamFromTemplate(srcStream)
170+
subtitleStreams.add(sOutStream)
171+
subtitleSources.add(sourcePath)
172+
173+
srcContainer.close()
174+
148175
var outPacket = av_packet_alloc()
149176
if outPacket == nil:
150177
error "Could not allocate output packet"
@@ -161,6 +188,10 @@ proc makeMedia*(args: mainArgs, tl: v3, outputPath: string, rules: Rules, bar: B
161188
for aEncCtx in audioEncoders:
162189
let name = aEncCtx.codec.canonicalName
163190
encoderTitles.add (if noColor: name else: &"\e[96m{name}")
191+
for sStream in subtitleStreams:
192+
let name = $sStream.name()
193+
if name != "":
194+
encoderTitles.add (if noColor: name else: &"\e[93m{name}")
164195

165196
if noColor:
166197
title &= encoderTitles.join("+")
@@ -275,4 +306,12 @@ proc makeMedia*(args: mainArgs, tl: v3, outputPath: string, rules: Rules, bar: B
275306
output.mux(outPacket[])
276307
av_packet_unref(outPacket)
277308

309+
# Process subtitle streams
310+
if not args.sn and subtitleStreams.len > 0:
311+
for i in 0..<subtitleStreams.len:
312+
let layer = tl.s[i]
313+
let sourcePath = subtitleSources[i]
314+
let outputStream = subtitleStreams[i]
315+
remuxSubtitles(sourcePath, layer, outputStream, output, tl.tb)
316+
278317
output.close()

src/render/subtitle.nim

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import ../timeline
2+
import ../ffmpeg
3+
import ../log
4+
import ../av
5+
6+
# Simple subtitle remuxing: copy subtitle packets from source, adjusting timestamps
7+
# Note: This approach works well for text-based subtitles (SRT, ASS, WebVTT, etc.)
8+
# For bitmap subtitles (DVD/PGS), the timestamps are adjusted but the visual data remains unchanged
9+
proc remuxSubtitles*(sourcePath: string, layer: ClipLayer, outputStream: ptr AVStream,
10+
output: var OutputContainer, timelineTb: AVRational) =
11+
if layer.c.len == 0:
12+
return
13+
14+
# Open source container for each remux operation
15+
let srcContainer = av.open(sourcePath)
16+
defer: srcContainer.close()
17+
18+
let formatCtx = srcContainer.formatContext
19+
let outTb = outputStream.time_base
20+
21+
for clip in layer.c:
22+
if clip.stream >= srcContainer.subtitle.len:
23+
continue
24+
25+
let streamIndex = srcContainer.subtitle[clip.stream].index
26+
let stream = formatCtx.streams[streamIndex]
27+
let srcTb = stream.time_base
28+
29+
# Seek to the clip's offset position in source timebase
30+
# Note: timelineTb is actually the frame rate, so the actual timebase is 1/timelineTb
31+
let seekPts = int64(float64(clip.offset) / (float64(timelineTb) * float64(srcTb)))
32+
33+
# Calculate the end position in source timebase
34+
let clipEndInSrcTb = int64(float64(clip.offset + clip.dur) / (float64(timelineTb) * float64(srcTb)))
35+
36+
# Seek to start of clip
37+
if seekPts > 0:
38+
srcContainer.seek(seekPts, backward = true, stream = stream)
39+
40+
var packet = av_packet_alloc()
41+
if packet == nil:
42+
error "Could not allocate subtitle packet"
43+
defer: av_packet_free(addr packet)
44+
45+
# Read and copy packets for this clip
46+
while av_read_frame(formatCtx, packet) >= 0:
47+
defer: av_packet_unref(packet)
48+
49+
if packet.stream_index == streamIndex:
50+
# Check if packet is within the clip range
51+
if packet.pts != AV_NOPTS_VALUE and packet.pts >= seekPts and packet.pts < clipEndInSrcTb:
52+
# Calculate new timestamp in output timebase
53+
# Step 1: Get relative position in source timebase
54+
let relativeStartInSrcTb = packet.pts - seekPts
55+
# Step 2: Convert to timeline units (frames)
56+
let relativeStartInFrames = int64(float64(relativeStartInSrcTb) * float64(timelineTb) * float64(srcTb))
57+
# Step 3: Calculate absolute position in frames
58+
let absoluteFramePos = clip.start + relativeStartInFrames
59+
# Step 4: Convert from frames to output timebase
60+
let newPts = int64(float64(absoluteFramePos) / (float64(timelineTb) * float64(outTb)))
61+
62+
# Create output packet with adjusted timestamps
63+
var outPacket: AVPacket
64+
outPacket.time_base = outTb
65+
outPacket.stream_index = outputStream.index
66+
67+
if av_packet_ref(addr outPacket, packet) < 0:
68+
error "Failed to reference subtitle packet"
69+
70+
# Convert timestamps to timeline timebase
71+
outPacket.pts = newPts
72+
if packet.dts != AV_NOPTS_VALUE:
73+
let relativeDtsInSrcTb = packet.dts - seekPts
74+
let relativeDtsInFrames = int64(float64(relativeDtsInSrcTb) * float64(timelineTb) * float64(srcTb))
75+
let absoluteDtsFramePos = clip.start + relativeDtsInFrames
76+
outPacket.dts = int64(float64(absoluteDtsFramePos) / (float64(timelineTb) * float64(outTb)))
77+
else:
78+
outPacket.dts = AV_NOPTS_VALUE
79+
80+
if packet.duration != AV_NOPTS_VALUE:
81+
outPacket.duration = int64(float64(packet.duration) * float64(timelineTb) * float64(srcTb) / (float64(timelineTb) * float64(outTb)))
82+
83+
# Mux the packet
84+
output.mux(outPacket)
85+
av_packet_unref(addr outPacket)
86+
87+
elif packet.pts != AV_NOPTS_VALUE and packet.pts >= clipEndInSrcTb:
88+
# We've passed the end of this clip
89+
break

src/timeline.nim

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ type v3* = object
5252
res*: (int, int)
5353
v*: seq[ClipLayer]
5454
a*: seq[ClipLayer]
55+
s*: seq[ClipLayer]
5556
effects*: seq[Action]
5657
clips2*: Option[seq[Clip2]] # Optional because tl might be non-linear.
5758

@@ -144,6 +145,7 @@ proc initLinearTimeline*(src: ptr string, tb: AvRational, bg: RGBColor, mi: Medi
144145

145146
var vspace: seq[ClipLayer] = @[]
146147
var aspace: seq[ClipLayer] = @[]
148+
var sspace: seq[ClipLayer] = @[]
147149

148150
if mi.v.len > 0:
149151
var vlayer = ClipLayer(lang: mi.v[0].lang, c: @[])
@@ -161,7 +163,15 @@ proc initLinearTimeline*(src: ptr string, tb: AvRational, bg: RGBColor, mi: Medi
161163
alayer.c.add(audioClip)
162164
aspace.add(alayer)
163165

164-
result = v3(tb: tb, v: vspace, a: aspace, bg: bg, effects: effects)
166+
for i in 0 ..< mi.s.len:
167+
var slayer = ClipLayer(lang: mi.s[i].lang, c: @[])
168+
for clip in clips:
169+
var subtitleClip = clip
170+
subtitleClip.stream = i.int32
171+
slayer.c.add(subtitleClip)
172+
sspace.add(slayer)
173+
174+
result = v3(tb: tb, v: vspace, a: aspace, s: sspace, bg: bg, effects: effects)
165175

166176
if result.timelineIsEmpty:
167177
error "Timeline is empty, nothing to do."
@@ -217,6 +227,7 @@ proc toNonLinear*(src: ptr string, tb: AvRational, bg: RGBColor, mi: MediaInfo,
217227

218228
var vspace: seq[ClipLayer] = @[]
219229
var aspace: seq[ClipLayer] = @[]
230+
var sspace: seq[ClipLayer] = @[]
220231

221232
if mi.v.len > 0:
222233
var vlayer = ClipLayer(lang: mi.v[0].lang, c: @[])
@@ -234,7 +245,15 @@ proc toNonLinear*(src: ptr string, tb: AvRational, bg: RGBColor, mi: MediaInfo,
234245
alayer.c.add(audioClip)
235246
aspace.add(alayer)
236247

237-
result = v3(tb: tb, v: vspace, a: aspace, bg: bg, clips2: some(clips2))
248+
for i in 0 ..< mi.s.len:
249+
var slayer = ClipLayer(lang: mi.s[i].lang, c: @[])
250+
for clip in clips:
251+
var subtitleClip = clip
252+
subtitleClip.stream = i.int32
253+
slayer.c.add(subtitleClip)
254+
sspace.add(slayer)
255+
256+
result = v3(tb: tb, v: vspace, a: aspace, s: sspace, bg: bg, clips2: some(clips2))
238257
result.effects = effects
239258

240259
if result.timelineIsEmpty:
@@ -275,6 +294,7 @@ proc toNonLinear2*(src: ptr string, tb: AVRational, bg: RGBColor, mi: MediaInfo,
275294

276295
var vspace: seq[ClipLayer] = @[]
277296
var aspace: seq[ClipLayer] = @[]
297+
var sspace: seq[ClipLayer] = @[]
278298

279299
if mi.v.len > 0:
280300
var vlayer = ClipLayer(lang: mi.v[0].lang, c: @[])
@@ -292,7 +312,15 @@ proc toNonLinear2*(src: ptr string, tb: AVRational, bg: RGBColor, mi: MediaInfo,
292312
alayer.c.add(audioClip)
293313
aspace.add(alayer)
294314

295-
result = v3(tb: tb, v: vspace, a: aspace, bg: bg, clips2: some(clips2))
315+
for i in 0 ..< mi.s.len:
316+
var slayer = ClipLayer(lang: mi.s[i].lang, c: @[])
317+
for clip in clips:
318+
var subtitleClip = clip
319+
subtitleClip.stream = i.int32
320+
slayer.c.add(subtitleClip)
321+
sspace.add(slayer)
322+
323+
result = v3(tb: tb, v: vspace, a: aspace, s: sspace, bg: bg, clips2: some(clips2))
296324
result.effects = effects
297325

298326
if result.timelineIsEmpty:

tests/ffwrapper.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class AudioStream:
4141
@dataclass(slots=True, frozen=True)
4242
class SubtitleStream:
4343
codec: str
44-
ext: str
4544
lang: str | None
4645

4746

@@ -130,16 +129,11 @@ def init(self, path: str, log: Log) -> FileInfo:
130129

131130
for s in cont.streams.subtitles:
132131
codec = s.codec_context.name
133-
sub_exts = {"mov_text": "srt", "ass": "ass", "webvtt": "vtt"}
134-
ext = sub_exts.get(codec, "vtt")
135-
subtitles += (SubtitleStream(codec, ext, s.language),)
132+
subtitles += (SubtitleStream(codec, s.language),)
136133

137134
bitrate = 0 if cont.bit_rate is None else cont.bit_rate
138135
dur = 0 if cont.duration is None else cont.duration / av.time_base
139136

140137
cont.close()
141138

142139
return FileInfo(Path(path), bitrate, dur, videos, audios, subtitles)
143-
144-
def __repr__(self) -> str:
145-
return f"@{self.path.name}"

tests/test.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -332,17 +332,33 @@ def test_premiere_named_export(self) -> None:
332332
self.main(["example.mp4"], ["--export", 'premiere:name="Foo Bar"'])
333333

334334
def test_export_subtitles(self) -> None:
335-
raise SkipTest() # TODO
336-
# cn = fileinfo(self.main(["resources/mov_text.mp4"], [], "movtext_out.mp4"))
335+
# Test mov_text subtitle stream handling
336+
cn = fileinfo(self.main(["resources/mov_text.mp4"], [], "movtext_out.mp4"))
337337

338-
# assert len(cn.videos) == 1
339-
# assert len(cn.audios) == 1
340-
# assert len(cn.subtitles) == 1
338+
assert len(cn.videos) == 1, "Should have exactly 1 video stream"
339+
assert len(cn.audios) == 1, "Should have exactly 1 audio stream"
340+
assert len(cn.subtitles) == 1, "Should have exactly 1 subtitle stream"
341+
assert cn.subtitles[0].codec == "mov_text", f"Expected mov_text codec, got {cn.subtitles[0].codec}"
341342

343+
# Test WebVTT subtitle stream handling
342344
cn = fileinfo(self.main(["resources/webvtt.mkv"], [], "webvtt_out.mkv"))
343-
assert len(cn.videos) == 1
344-
assert len(cn.audios) == 1
345-
assert len(cn.subtitles) == 1
345+
assert len(cn.videos) == 1, "Should have exactly 1 video stream"
346+
assert len(cn.audios) == 1, "Should have exactly 1 audio stream"
347+
assert len(cn.subtitles) == 1, "Should have exactly 1 subtitle stream"
348+
assert cn.subtitles[0].codec == "webvtt", f"Expected webvtt codec, got {cn.subtitles[0].codec}"
349+
350+
def test_sn(self) -> None:
351+
# Test that -sn flag properly disables subtitle streams
352+
cn = fileinfo(self.main(["resources/mov_text.mp4"], ["-sn"], "movtext_nosub.mp4"))
353+
assert len(cn.videos) == 1, "Should have exactly 1 video stream"
354+
assert len(cn.audios) == 1, "Should have exactly 1 audio stream"
355+
assert len(cn.subtitles) == 0, "Should have 0 subtitle streams when -sn flag is used"
356+
357+
# Test that -sn flag works with webvtt too
358+
cn = fileinfo(self.main(["resources/webvtt.mkv"], ["-sn"], "webvtt_nosub.mkv"))
359+
assert len(cn.videos) == 1, "Should have exactly 1 video stream"
360+
assert len(cn.audios) == 1, "Should have exactly 1 audio stream"
361+
assert len(cn.subtitles) == 0, "Should have 0 subtitle streams when -sn flag is used"
346362

347363
def test_scale(self) -> None:
348364
cn = fileinfo(self.main(["example.mp4"], ["--scale", "1.5"], "scale.mp4"))

0 commit comments

Comments
 (0)