diff --git a/spec/helpers/vtt/builder_spec.cr b/spec/helpers/vtt/builder_spec.cr new file mode 100644 index 00000000..7b543ddc --- /dev/null +++ b/spec/helpers/vtt/builder_spec.cr @@ -0,0 +1,64 @@ +require "../../spec_helper.cr" + +MockLines = [ + { + "start_time": Time::Span.new(seconds: 1), + "end_time": Time::Span.new(seconds: 2), + "text": "Line 1", + }, + + { + "start_time": Time::Span.new(seconds: 2), + "end_time": Time::Span.new(seconds: 3), + "text": "Line 2", + }, +] + +Spectator.describe "WebVTT::Builder" do + it "correctly builds a vtt file" do + result = WebVTT.build do |vtt| + MockLines.each do |line| + vtt.cue(line["start_time"], line["end_time"], line["text"]) + end + end + + expect(result).to eq([ + "WEBVTT", + "", + "00:00:01.000 --> 00:00:02.000", + "Line 1", + "", + "00:00:02.000 --> 00:00:03.000", + "Line 2", + "", + "", + ].join('\n')) + end + + it "correctly builds a vtt file with setting fields" do + setting_fields = { + "Kind" => "captions", + "Language" => "en", + } + + result = WebVTT.build(setting_fields) do |vtt| + MockLines.each do |line| + vtt.cue(line["start_time"], line["end_time"], line["text"]) + end + end + + expect(result).to eq([ + "WEBVTT", + "Kind: captions", + "Language: en", + "", + "00:00:01.000 --> 00:00:02.000", + "Line 1", + "", + "00:00:02.000 --> 00:00:03.000", + "Line 2", + "", + "", + ].join('\n')) + end +end diff --git a/src/invidious/helpers/webvtt.cr b/src/invidious/helpers/webvtt.cr new file mode 100644 index 00000000..56f761ed --- /dev/null +++ b/src/invidious/helpers/webvtt.cr @@ -0,0 +1,67 @@ +# Namespace for logic relating to generating WebVTT files +# +# Probably not compliant to WebVTT's specs but it is enough for Invidious. +module WebVTT + # A WebVTT builder generates WebVTT files + private class Builder + def initialize(@io : IO) + end + + # Writes an vtt cue with the specified time stamp and contents + def cue(start_time : Time::Span, end_time : Time::Span, text : String) + timestamp(start_time, end_time) + @io << text + @io << "\n\n" + end + + private def timestamp(start_time : Time::Span, end_time : Time::Span) + timestamp_component(start_time) + @io << " --> " + timestamp_component(end_time) + + @io << '\n' + end + + private def timestamp_component(timestamp : Time::Span) + @io << timestamp.hours.to_s.rjust(2, '0') + @io << ':' << timestamp.minutes.to_s.rjust(2, '0') + @io << ':' << timestamp.seconds.to_s.rjust(2, '0') + @io << '.' << timestamp.milliseconds.to_s.rjust(3, '0') + end + + def document(setting_fields : Hash(String, String)? = nil, &) + @io << "WEBVTT\n" + + if setting_fields + setting_fields.each do |name, value| + @io << name << ": " << value << '\n' + end + end + + @io << '\n' + + yield + end + end + + # Returns the resulting `String` of writing WebVTT to the yielded `WebVTT::Builder` + # + # ``` + # string = WebVTT.build do |vtt| + # vtt.cue(Time::Span.new(seconds: 1), Time::Span.new(seconds: 2), "Line 1") + # vtt.cue(Time::Span.new(seconds: 2), Time::Span.new(seconds: 3), "Line 2") + # end + # + # string # => "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nLine 1\n\n00:00:02.000 --> 00:00:03.000\nLine 2\n\n" + # ``` + # + # Accepts an optional settings fields hash to add settings attribute to the resulting vtt file. + def self.build(setting_fields : Hash(String, String)? = nil, &) + String.build do |str| + builder = Builder.new(str) + builder.document(setting_fields) do + yield builder + end + end + end +end diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 25e766d2..449c9f9b 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -101,20 +101,17 @@ module Invidious::Routes::API::V1::Videos if caption.name.includes? "auto-generated" caption_xml = YT_POOL.client &.get(url).body + settings_field = { + "Kind" => "captions", + "Language" => "#{tlang || caption.language_code}", + } + if caption_xml.starts_with?("/, "") text = text.gsub(/<\/font>/, "") @@ -137,12 +131,7 @@ module Invidious::Routes::API::V1::Videos text = "#{md["text"]}" end - str << <<-END_CUE - #{start_time} --> #{end_time} - #{text} - - - END_CUE + webvtt.cue(start_time, end_time, text) end end end @@ -215,11 +204,7 @@ module Invidious::Routes::API::V1::Videos storyboard = storyboard[0] end - String.build do |str| - str << <<-END_VTT - WEBVTT - END_VTT - + WebVTT.build do |vtt| start_time = 0.milliseconds end_time = storyboard[:interval].milliseconds @@ -231,12 +216,8 @@ module Invidious::Routes::API::V1::Videos storyboard[:storyboard_height].times do |j| storyboard[:storyboard_width].times do |k| - str << <<-END_CUE - #{start_time}.000 --> #{end_time}.000 - #{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width] - 2},#{storyboard[:height]} - - - END_CUE + current_cue_url = "#{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width] - 2},#{storyboard[:height]}" + vtt.cue(start_time, end_time, current_cue_url) start_time += storyboard[:interval].milliseconds end_time += storyboard[:interval].milliseconds diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 256dfcc0..484e61d2 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -52,17 +52,13 @@ module Invidious::Videos break end end - result = String.build do |result| - result << <<-END_VTT - WEBVTT - Kind: captions - Language: #{tlang || @language_code} + settings_field = { + "Kind" => "captions", + "Language" => "#{tlang || @language_code}", + } - END_VTT - - result << "\n\n" - + result = WebVTT.build(settings_field) do |vtt| cues.each_with_index do |node, i| start_time = node["t"].to_f.milliseconds @@ -76,29 +72,16 @@ module Invidious::Videos end_time = start_time + duration end - # start_time - result << start_time.hours.to_s.rjust(2, '0') - result << ':' << start_time.minutes.to_s.rjust(2, '0') - result << ':' << start_time.seconds.to_s.rjust(2, '0') - result << '.' << start_time.milliseconds.to_s.rjust(3, '0') - - result << " --> " - - # end_time - result << end_time.hours.to_s.rjust(2, '0') - result << ':' << end_time.minutes.to_s.rjust(2, '0') - result << ':' << end_time.seconds.to_s.rjust(2, '0') - result << '.' << end_time.milliseconds.to_s.rjust(3, '0') - - result << "\n" - - node.children.each do |s| - result << s.content + text = String.build do |io| + node.children.each do |s| + io << s.content + end end - result << "\n" - result << "\n" + + vtt.cue(start_time, end_time, text) end end + return result end end diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index f3360a52..dac00eea 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -34,41 +34,15 @@ module Invidious::Videos # Convert into array of TranscriptLine lines = self.parse(initial_data) + settings_field = { + "Kind" => "captions", + "Language" => target_language, + } + # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt() - vtt = String.build do |vtt| - vtt << <<-END_VTT - WEBVTT - Kind: captions - Language: #{target_language} - - - END_VTT - - vtt << "\n\n" - + vtt = WebVTT.build(settings_field) do |vtt| lines.each do |line| - start_time = line.start_ms - end_time = line.end_ms - - # start_time - vtt << start_time.hours.to_s.rjust(2, '0') - vtt << ':' << start_time.minutes.to_s.rjust(2, '0') - vtt << ':' << start_time.seconds.to_s.rjust(2, '0') - vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0') - - vtt << " --> " - - # end_time - vtt << end_time.hours.to_s.rjust(2, '0') - vtt << ':' << end_time.minutes.to_s.rjust(2, '0') - vtt << ':' << end_time.seconds.to_s.rjust(2, '0') - vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0') - - vtt << "\n" - vtt << line.line - - vtt << "\n" - vtt << "\n" + vtt.cue(line.start_ms, line.end_ms, line.line) end end