From 56dbe159435d643499ac875fe2c4283be8e61b84 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 7 Oct 2021 22:16:50 +0200 Subject: [PATCH 1/9] Move proxy & YoutubeAPI code to the yt_backend folder --- src/invidious/{helpers => yt_backend}/proxy.cr | 0 src/invidious/{helpers => yt_backend}/youtube_api.cr | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/invidious/{helpers => yt_backend}/proxy.cr (100%) rename src/invidious/{helpers => yt_backend}/youtube_api.cr (100%) diff --git a/src/invidious/helpers/proxy.cr b/src/invidious/yt_backend/proxy.cr similarity index 100% rename from src/invidious/helpers/proxy.cr rename to src/invidious/yt_backend/proxy.cr diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr similarity index 100% rename from src/invidious/helpers/youtube_api.cr rename to src/invidious/yt_backend/youtube_api.cr From d300797e229e12973559334cc53a17f79a27ac90 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 7 Oct 2021 22:17:24 +0200 Subject: [PATCH 2/9] Move the YoutubeConnectionPool code to its own file --- src/invidious/helpers/utils.cr | 81 --------------------- src/invidious/yt_backend/connection_pool.cr | 81 +++++++++++++++++++++ 2 files changed, 81 insertions(+), 81 deletions(-) create mode 100644 src/invidious/yt_backend/connection_pool.cr diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 68ba76f9..6100d403 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -1,4 +1,3 @@ -require "lsquic" require "db" def add_yt_headers(request) @@ -16,55 +15,6 @@ def add_yt_headers(request) end end -struct YoutubeConnectionPool - property! url : URI - property! capacity : Int32 - property! timeout : Float64 - property pool : DB::Pool(QUIC::Client | HTTP::Client) - - def initialize(url : URI, @capacity = 5, @timeout = 5.0, use_quic = true) - @url = url - @pool = build_pool(use_quic) - end - - def client(region = nil, &block) - if region - conn = make_client(url, region) - response = yield conn - else - conn = pool.checkout - begin - response = yield conn - rescue ex - conn.close - conn = QUIC::Client.new(url) - conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - response = yield conn - ensure - pool.release(conn) - end - end - - response - end - - private def build_pool(use_quic) - DB::Pool(QUIC::Client | HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do - if use_quic - conn = QUIC::Client.new(url) - else - conn = HTTP::Client.new(url) - end - conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - conn - end - end -end - # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html def ci_lower_bound(pos, n) if n == 0 @@ -85,37 +35,6 @@ def elapsed_text(elapsed) "#{(millis * 1000).round(2)}µs" end -def make_client(url : URI, region = nil) - # TODO: Migrate any applicable endpoints to QUIC - client = HTTPClient.new(url, OpenSSL::SSL::Context::Client.insecure) - client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC - client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - client.read_timeout = 10.seconds - client.connect_timeout = 10.seconds - - if region - PROXY_LIST[region]?.try &.sample(40).each do |proxy| - begin - proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) - client.set_proxy(proxy) - break - rescue ex - end - end - end - - return client -end - -def make_client(url : URI, region = nil, &block) - client = make_client(url, region) - begin - yield client - ensure - client.close - end -end - def decode_length_seconds(string) length_seconds = string.gsub(/[^0-9:]/, "").split(":").map &.to_i length_seconds = [0] * (3 - length_seconds.size) + length_seconds diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr new file mode 100644 index 00000000..505f2cf6 --- /dev/null +++ b/src/invidious/yt_backend/connection_pool.cr @@ -0,0 +1,81 @@ +require "lsquic" + +struct YoutubeConnectionPool + property! url : URI + property! capacity : Int32 + property! timeout : Float64 + property pool : DB::Pool(QUIC::Client | HTTP::Client) + + def initialize(url : URI, @capacity = 5, @timeout = 5.0, use_quic = true) + @url = url + @pool = build_pool(use_quic) + end + + def client(region = nil, &block) + if region + conn = make_client(url, region) + response = yield conn + else + conn = pool.checkout + begin + response = yield conn + rescue ex + conn.close + conn = QUIC::Client.new(url) + conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET + conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC + conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + response = yield conn + ensure + pool.release(conn) + end + end + + response + end + + private def build_pool(use_quic) + DB::Pool(QUIC::Client | HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do + if use_quic + conn = QUIC::Client.new(url) + else + conn = HTTP::Client.new(url) + end + conn.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::INET + conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC + conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + conn + end + end +end + +def make_client(url : URI, region = nil) + # TODO: Migrate any applicable endpoints to QUIC + client = HTTPClient.new(url, OpenSSL::SSL::Context::Client.insecure) + client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC + client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + client.read_timeout = 10.seconds + client.connect_timeout = 10.seconds + + if region + PROXY_LIST[region]?.try &.sample(40).each do |proxy| + begin + proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) + client.set_proxy(proxy) + break + rescue ex + end + end + end + + return client +end + +def make_client(url : URI, region = nil, &block) + client = make_client(url, region) + begin + yield client + ensure + client.close + end +end From f7f09109531979de6e8bc7789b56b3e69b818b6b Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 7 Oct 2021 22:21:40 +0200 Subject: [PATCH 3/9] Remove fetch_continuation_token(): dead code --- src/invidious/helpers/helpers.cr | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 968062d6..baf82740 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -270,24 +270,6 @@ def extract_selected_tab(tabs) return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] end -def fetch_continuation_token(items : Array(JSON::Any)) - # Fetches the continuation token from an array of items - return items.last["continuationItemRenderer"]? - .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s -end - -def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) - # Fetches the continuation token from initial data - if initial_data["onResponseReceivedActions"]? - continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] - else - tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) - continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] - end - - return fetch_continuation_token(continuation_items.as_a) -end - def check_enum(db, enum_name, struct_type = nil) return # TODO From 7df2fd0bc8f5174ab0c428165ec0a4202dcf1fd5 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 7 Oct 2021 22:32:04 +0200 Subject: [PATCH 4/9] Add 'require' statement to 'invidious.cr' --- src/invidious.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/invidious.cr b/src/invidious.cr index 18ec0b97..3a20b0d8 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -27,6 +27,7 @@ require "yaml" require "compress/zip" require "protodec/utils" require "./invidious/helpers/*" +require "./invidious/yt_backend/*" require "./invidious/*" require "./invidious/channels/*" require "./invidious/routes/**" From 8805ee7c8c5d1023c032b52cc79b1c1048b60afd Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 11 Oct 2021 18:55:15 +0200 Subject: [PATCH 5/9] Add fetch_continuation_token back (required by #2215) --- src/invidious/yt_backend/extractors_utils.cr | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/invidious/yt_backend/extractors_utils.cr diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr new file mode 100644 index 00000000..e0a13031 --- /dev/null +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -0,0 +1,17 @@ +def fetch_continuation_token(items : Array(JSON::Any)) + # Fetches the continuation token from an array of items + return items.last["continuationItemRenderer"]? + .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s +end + +def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) + # Fetches the continuation token from initial data + if initial_data["onResponseReceivedActions"]? + continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] + else + tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) + continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] + end + + return fetch_continuation_token(continuation_items.as_a) +end From e17c8b1f4deaa56dcdd5d3b9f62bec13f9b71dc7 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 11 Oct 2021 18:58:12 +0200 Subject: [PATCH 6/9] Move 'extract_videos' and 'extract_selected_tab' too --- src/invidious/helpers/helpers.cr | 19 ------------------- src/invidious/yt_backend/extractors_utils.cr | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index baf82740..c01ca11e 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -251,25 +251,6 @@ def html_to_content(description_html : String) return description end -def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - extracted = extract_items(initial_data, author_fallback, author_id_fallback) - - target = [] of SearchItem - extracted.each do |i| - if i.is_a?(Category) - i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } - else - target << i - end - end - return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) -end - -def extract_selected_tab(tabs) - # Extract the selected tab from the array of tabs Youtube returns - return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] -end - def check_enum(db, enum_name, struct_type = nil) return # TODO diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr index e0a13031..b76fa09a 100644 --- a/src/invidious/yt_backend/extractors_utils.cr +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -1,3 +1,22 @@ +def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) + extracted = extract_items(initial_data, author_fallback, author_id_fallback) + + target = [] of SearchItem + extracted.each do |i| + if i.is_a?(Category) + i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } + else + target << i + end + end + return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) +end + +def extract_selected_tab(tabs) + # Extract the selected tab from the array of tabs Youtube returns + return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] +end + def fetch_continuation_token(items : Array(JSON::Any)) # Fetches the continuation token from an array of items return items.last["continuationItemRenderer"]? From 2571e420f3ebc93dd518ea7d97f03aeb6a00b2b8 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Tue, 12 Oct 2021 12:21:47 +0200 Subject: [PATCH 7/9] Move 'add_yt_headers()' to 'connection_pool.cr' --- src/invidious/helpers/utils.cr | 15 --------------- src/invidious/yt_backend/connection_pool.cr | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 6100d403..65067526 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -1,20 +1,5 @@ require "db" -def add_yt_headers(request) - request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" - request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" - request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - request.headers["accept-language"] ||= "en-us,en;q=0.5" - return if request.resource.starts_with? "/sorry/index" - request.headers["x-youtube-client-name"] ||= "1" - request.headers["x-youtube-client-version"] ||= "2.20200609" - # Preserve original cookies and add new YT consent cookie for EU servers - request.headers["cookie"] = "#{request.headers["cookie"]?}; CONSENT=YES+" - if !CONFIG.cookies.empty? - request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" - end -end - # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html def ci_lower_bound(pos, n) if n == 0 diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr index 505f2cf6..5ba2d73c 100644 --- a/src/invidious/yt_backend/connection_pool.cr +++ b/src/invidious/yt_backend/connection_pool.cr @@ -1,5 +1,20 @@ require "lsquic" +def add_yt_headers(request) + request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" + request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + request.headers["accept-language"] ||= "en-us,en;q=0.5" + return if request.resource.starts_with? "/sorry/index" + request.headers["x-youtube-client-name"] ||= "1" + request.headers["x-youtube-client-version"] ||= "2.20200609" + # Preserve original cookies and add new YT consent cookie for EU servers + request.headers["cookie"] = "#{request.headers["cookie"]?}; CONSENT=YES+" + if !CONFIG.cookies.empty? + request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" + end +end + struct YoutubeConnectionPool property! url : URI property! capacity : Int32 From 33d9be0ffb6bad28eb2f624aec4a7ba8f9a1795c Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 18 Oct 2021 16:12:49 +0200 Subject: [PATCH 8/9] Move 'extractors.cr' to 'yt_backend' folder --- src/invidious/{helpers => yt_backend}/extractors.cr | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/invidious/{helpers => yt_backend}/extractors.cr (100%) diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/yt_backend/extractors.cr similarity index 100% rename from src/invidious/helpers/extractors.cr rename to src/invidious/yt_backend/extractors.cr From cb9b84f940a3cb05a35790f73c055d6a5a62ec4f Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 18 Oct 2021 16:14:37 +0200 Subject: [PATCH 9/9] Move 'extract_text()' to 'extractors_utils.cr' --- src/invidious/yt_backend/extractors.cr | 31 -------------------- src/invidious/yt_backend/extractors_utils.cr | 31 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 0277d43b..8398ca8e 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -533,37 +533,6 @@ private module HelperExtractors end end -# Extracts text from InnerTube response -# -# InnerTube can package text in three different formats -# "runs": [ -# {"text": "something"}, -# {"text": "cont"}, -# ... -# ] -# -# "SimpleText": "something" -# -# Or sometimes just none at all as with the data returned from -# category continuations. -# -# In order to facilitate calling this function with `#[]?`: -# A nil will be accepted. Of course, since nil cannot be parsed, -# another nil will be returned. -def extract_text(item : JSON::Any?) : String? - if item.nil? - return nil - end - - if text_container = item["simpleText"]? - return text_container.as_s - elsif text_container = item["runs"]? - return text_container.as_a.map(&.["text"].as_s).join("") - else - nil - end -end - # Parses an item from Youtube's JSON response into a more usable structure. # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel. def extract_item(item : JSON::Any, author_fallback : String? = "", diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr index b76fa09a..97cc0997 100644 --- a/src/invidious/yt_backend/extractors_utils.cr +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -1,3 +1,34 @@ +# Extracts text from InnerTube response +# +# InnerTube can package text in three different formats +# "runs": [ +# {"text": "something"}, +# {"text": "cont"}, +# ... +# ] +# +# "SimpleText": "something" +# +# Or sometimes just none at all as with the data returned from +# category continuations. +# +# In order to facilitate calling this function with `#[]?`: +# A nil will be accepted. Of course, since nil cannot be parsed, +# another nil will be returned. +def extract_text(item : JSON::Any?) : String? + if item.nil? + return nil + end + + if text_container = item["simpleText"]? + return text_container.as_s + elsif text_container = item["runs"]? + return text_container.as_a.map(&.["text"].as_s).join("") + else + nil + end +end + def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) extracted = extract_items(initial_data, author_fallback, author_id_fallback)