diff --git a/src/invidious.cr b/src/invidious.cr index 18ec0b97..3a20b0d8 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -27,6 +27,7 @@ require "yaml" require "compress/zip" require "protodec/utils" require "./invidious/helpers/*" +require "./invidious/yt_backend/*" require "./invidious/*" require "./invidious/channels/*" require "./invidious/routes/**" diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 968062d6..c01ca11e 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -251,43 +251,6 @@ def html_to_content(description_html : String) return description end -def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - extracted = extract_items(initial_data, author_fallback, author_id_fallback) - - target = [] of SearchItem - extracted.each do |i| - if i.is_a?(Category) - i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } - else - target << i - end - end - return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) -end - -def extract_selected_tab(tabs) - # Extract the selected tab from the array of tabs Youtube returns - return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] -end - -def fetch_continuation_token(items : Array(JSON::Any)) - # Fetches the continuation token from an array of items - return items.last["continuationItemRenderer"]? - .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s -end - -def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) - # Fetches the continuation token from initial data - if initial_data["onResponseReceivedActions"]? - continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] - else - tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) - continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] - end - - return fetch_continuation_token(continuation_items.as_a) -end - def check_enum(db, enum_name, struct_type = nil) return # TODO diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 68ba76f9..65067526 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -1,70 +1,5 @@ -require "lsquic" require "db" -def add_yt_headers(request) - request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" - request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" - request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - request.headers["accept-language"] ||= "en-us,en;q=0.5" - return if request.resource.starts_with? "/sorry/index" - request.headers["x-youtube-client-name"] ||= "1" - request.headers["x-youtube-client-version"] ||= "2.20200609" - # Preserve original cookies and add new YT consent cookie for EU servers - request.headers["cookie"] = "#{request.headers["cookie"]?}; CONSENT=YES+" - if !CONFIG.cookies.empty? - request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" - end -end - -struct YoutubeConnectionPool - property! url : URI - property! capacity : Int32 - property! timeout : Float64 - property pool : DB::Pool(QUIC::Client | HTTP::Client) - - def initialize(url : URI, @capacity = 5, @timeout = 5.0, use_quic = true) - @url = url - @pool = build_pool(use_quic) - end - - def client(region = nil, &block) - if region - conn = make_client(url, region) - response = yield conn - else - conn = pool.checkout - begin - response = yield conn - rescue ex - conn.close - conn = QUIC::Client.new(url) - conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - response = yield conn - ensure - pool.release(conn) - end - end - - response - end - - private def build_pool(use_quic) - DB::Pool(QUIC::Client | HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do - if use_quic - conn = QUIC::Client.new(url) - else - conn = HTTP::Client.new(url) - end - conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - conn - end - end -end - # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html def ci_lower_bound(pos, n) if n == 0 @@ -85,37 +20,6 @@ def elapsed_text(elapsed) "#{(millis * 1000).round(2)}µs" end -def make_client(url : URI, region = nil) - # TODO: Migrate any applicable endpoints to QUIC - client = HTTPClient.new(url, OpenSSL::SSL::Context::Client.insecure) - client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC - client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - client.read_timeout = 10.seconds - client.connect_timeout = 10.seconds - - if region - PROXY_LIST[region]?.try &.sample(40).each do |proxy| - begin - proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) - client.set_proxy(proxy) - break - rescue ex - end - end - end - - return client -end - -def make_client(url : URI, region = nil, &block) - client = make_client(url, region) - begin - yield client - ensure - client.close - end -end - def decode_length_seconds(string) length_seconds = string.gsub(/[^0-9:]/, "").split(":").map &.to_i length_seconds = [0] * (3 - length_seconds.size) + length_seconds diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr new file mode 100644 index 00000000..5ba2d73c --- /dev/null +++ b/src/invidious/yt_backend/connection_pool.cr @@ -0,0 +1,96 @@ +require "lsquic" + +def add_yt_headers(request) + request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" + request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + request.headers["accept-language"] ||= "en-us,en;q=0.5" + return if request.resource.starts_with? "/sorry/index" + request.headers["x-youtube-client-name"] ||= "1" + request.headers["x-youtube-client-version"] ||= "2.20200609" + # Preserve original cookies and add new YT consent cookie for EU servers + request.headers["cookie"] = "#{request.headers["cookie"]?}; CONSENT=YES+" + if !CONFIG.cookies.empty? + request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" + end +end + +struct YoutubeConnectionPool + property! url : URI + property! capacity : Int32 + property! timeout : Float64 + property pool : DB::Pool(QUIC::Client | HTTP::Client) + + def initialize(url : URI, @capacity = 5, @timeout = 5.0, use_quic = true) + @url = url + @pool = build_pool(use_quic) + end + + def client(region = nil, &block) + if region + conn = make_client(url, region) + response = yield conn + else + conn = pool.checkout + begin + response = yield conn + rescue ex + conn.close + conn = QUIC::Client.new(url) + conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET + conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC + conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + response = yield conn + ensure + pool.release(conn) + end + end + + response + end + + private def build_pool(use_quic) + DB::Pool(QUIC::Client | HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do + if use_quic + conn = QUIC::Client.new(url) + else + conn = HTTP::Client.new(url) + end + conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET + conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC + conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + conn + end + end +end + +def make_client(url : URI, region = nil) + # TODO: Migrate any applicable endpoints to QUIC + client = HTTPClient.new(url, OpenSSL::SSL::Context::Client.insecure) + client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC + client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + client.read_timeout = 10.seconds + client.connect_timeout = 10.seconds + + if region + PROXY_LIST[region]?.try &.sample(40).each do |proxy| + begin + proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) + client.set_proxy(proxy) + break + rescue ex + end + end + end + + return client +end + +def make_client(url : URI, region = nil, &block) + client = make_client(url, region) + begin + yield client + ensure + client.close + end +end diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/yt_backend/extractors.cr similarity index 96% rename from src/invidious/helpers/extractors.cr rename to src/invidious/yt_backend/extractors.cr index 0277d43b..8398ca8e 100644 --- a/src/invidious/helpers/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -533,37 +533,6 @@ private module HelperExtractors end end -# Extracts text from InnerTube response -# -# InnerTube can package text in three different formats -# "runs": [ -# {"text": "something"}, -# {"text": "cont"}, -# ... -# ] -# -# "SimpleText": "something" -# -# Or sometimes just none at all as with the data returned from -# category continuations. -# -# In order to facilitate calling this function with `#[]?`: -# A nil will be accepted. Of course, since nil cannot be parsed, -# another nil will be returned. -def extract_text(item : JSON::Any?) : String? - if item.nil? - return nil - end - - if text_container = item["simpleText"]? - return text_container.as_s - elsif text_container = item["runs"]? - return text_container.as_a.map(&.["text"].as_s).join("") - else - nil - end -end - # Parses an item from Youtube's JSON response into a more usable structure. # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel. def extract_item(item : JSON::Any, author_fallback : String? = "", diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr new file mode 100644 index 00000000..97cc0997 --- /dev/null +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -0,0 +1,67 @@ +# Extracts text from InnerTube response +# +# InnerTube can package text in three different formats +# "runs": [ +# {"text": "something"}, +# {"text": "cont"}, +# ... +# ] +# +# "SimpleText": "something" +# +# Or sometimes just none at all as with the data returned from +# category continuations. +# +# In order to facilitate calling this function with `#[]?`: +# A nil will be accepted. Of course, since nil cannot be parsed, +# another nil will be returned. +def extract_text(item : JSON::Any?) : String? + if item.nil? + return nil + end + + if text_container = item["simpleText"]? + return text_container.as_s + elsif text_container = item["runs"]? + return text_container.as_a.map(&.["text"].as_s).join("") + else + nil + end +end + +def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) + extracted = extract_items(initial_data, author_fallback, author_id_fallback) + + target = [] of SearchItem + extracted.each do |i| + if i.is_a?(Category) + i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } + else + target << i + end + end + return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) +end + +def extract_selected_tab(tabs) + # Extract the selected tab from the array of tabs Youtube returns + return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] +end + +def fetch_continuation_token(items : Array(JSON::Any)) + # Fetches the continuation token from an array of items + return items.last["continuationItemRenderer"]? + .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s +end + +def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) + # Fetches the continuation token from initial data + if initial_data["onResponseReceivedActions"]? + continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] + else + tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) + continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] + end + + return fetch_continuation_token(continuation_items.as_a) +end diff --git a/src/invidious/helpers/proxy.cr b/src/invidious/yt_backend/proxy.cr similarity index 100% rename from src/invidious/helpers/proxy.cr rename to src/invidious/yt_backend/proxy.cr diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr similarity index 100% rename from src/invidious/helpers/youtube_api.cr rename to src/invidious/yt_backend/youtube_api.cr