From 90d661e4a7fa6c07f8eea9d4af1a9467dfc09826 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Sat, 4 Aug 2018 15:30:44 -0500 Subject: [PATCH] Split helpers.cr into multiple files --- src/invidious.cr | 2906 ++++++++++++++---------------- src/invidious/channels.cr | 132 ++ src/invidious/comments.cr | 247 +++ src/invidious/helpers.cr | 1272 ------------- src/invidious/helpers/helpers.cr | 273 +++ src/invidious/helpers/macros.cr | 18 + src/invidious/helpers/utils.cr | 129 ++ src/invidious/jobs.cr | 136 ++ src/invidious/search.cr | 30 + src/invidious/signatures.cr | 65 + src/invidious/users.cr | 146 ++ src/invidious/videos.cr | 223 +++ 12 files changed, 2798 insertions(+), 2779 deletions(-) create mode 100644 src/invidious/channels.cr create mode 100644 src/invidious/comments.cr delete mode 100644 src/invidious/helpers.cr create mode 100644 src/invidious/helpers/helpers.cr create mode 100644 src/invidious/helpers/macros.cr create mode 100644 src/invidious/helpers/utils.cr create mode 100644 src/invidious/jobs.cr create mode 100644 src/invidious/search.cr create mode 100644 src/invidious/signatures.cr create mode 100644 src/invidious/users.cr create mode 100644 src/invidious/videos.cr diff --git a/src/invidious.cr b/src/invidious.cr index 870bddcc..e82f0b9f 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -23,6 +23,7 @@ require "pg" require "xml" require "yaml" require "zip" +require "./invidious/helpers/*" require "./invidious/*" CONFIG = Config.from_yaml(File.read("config/config.yml")) @@ -78,153 +79,33 @@ LOGIN_URL = URI.parse("https://accounts.google.com") crawl_threads.times do spawn do - ids = Deque(String).new - random = Random.new - - search(random.base64(3)).each do |video| - ids << video.id - end - - loop do - client = make_client(YT_URL) - if ids.empty? - search(random.base64(3)).each do |video| - ids << video.id - end - end - - begin - id = ids[0] - video = get_video(id, PG_DB) - rescue ex - STDOUT << id << " : " << ex.message << "\n" - next - ensure - ids.delete(id) - end - - rvs = [] of Hash(String, String) - if video.info.has_key?("rvs") - video.info["rvs"].split(",").each do |rv| - rvs << HTTP::Params.parse(rv).to_h - end - end - - rvs.each do |rv| - if rv.has_key?("id") && !PG_DB.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", rv["id"], as: Bool) - ids.delete(id) - ids << rv["id"] - if ids.size == 150 - ids.shift - end - end - end - - Fiber.yield - end + crawl_videos(PG_DB) end end channel_threads.times do |i| spawn do - loop do - query = "SELECT id FROM channels ORDER BY updated \ - LIMIT (SELECT count(*)/$2 FROM channels) \ - OFFSET (SELECT count(*)*$1/$2 FROM channels)" - PG_DB.query(query, i, channel_threads) do |rs| - rs.each do - client = make_client(YT_URL) - - begin - id = rs.read(String) - channel = fetch_channel(id, client, PG_DB, false) - PG_DB.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id) - rescue ex - STDOUT << id << " : " << ex.message << "\n" - next - end - end - end - - Fiber.yield - end + refresh_channels(PG_DB) end end video_threads.times do |i| spawn do - loop do - query = "SELECT id FROM videos ORDER BY updated \ - LIMIT (SELECT count(*)/$2 FROM videos) \ - OFFSET (SELECT count(*)*$1/$2 FROM videos)" - PG_DB.query(query, i, video_threads) do |rs| - rs.each do - begin - id = rs.read(String) - video = get_video(id, PG_DB) - rescue ex - STDOUT << id << " : " << ex.message << "\n" - next - end - end - end - - Fiber.yield - end + refresh_videos(PG_DB) end end top_videos = [] of Video spawn do - if CONFIG.dl_api_key - DetectLanguage.configure do |config| - config.api_key = CONFIG.dl_api_key.not_nil! - end - filter = true - end - - filter ||= false - - loop do - begin - top = rank_videos(PG_DB, 40, filter, YT_URL) - rescue ex - next - end - - if top.size > 0 - args = arg_array(top) - else - next - end - - videos = [] of Video - - top.each do |id| - begin - videos << get_video(id, PG_DB) - rescue ex - next - end - end - + pull_top_videos(CONFIG, PG_DB) do |videos| top_videos = videos - Fiber.yield end end -# Refresh decrypt function decrypt_function = [] of {name: String, value: Int32} spawn do - loop do - client = make_client(YT_URL) - - begin - decrypt_function = update_decrypt_function(client) - rescue ex - end - - Fiber.yield + update_decrypt_function do |function| + decrypt_function = function end end @@ -266,6 +147,29 @@ get "/" do |env| templated "index" end +# Videos + +get "/:id" do |env| + id = env.params.url["id"] + + if md = id.match(/[a-zA-Z0-9_-]{11}/) + params = [] of String + env.params.query.each do |k, v| + params << "#{k}=#{v}" + end + params = params.join("&") + + url = "/watch?v=#{id}" + if !params.empty? + url += "&#{params}" + end + + env.redirect url + else + env.response.status_code = 404 + end +end + get "/watch" do |env| if env.params.query["v"]? id = env.params.query["v"] @@ -448,6 +352,1369 @@ get "/watch" do |env| templated "watch" end +get "/embed/:id" do |env| + if env.params.url["id"]? + id = env.params.url["id"] + else + next env.redirect "/" + end + + if env.params.query["start"]? + video_start = decode_time(env.params.query["start"]) + end + + if env.params.query["t"]? + video_start = decode_time(env.params.query["t"]) + end + video_start ||= 0 + + if env.params.query["end"]? + video_end = decode_time(env.params.query["end"]) + end + video_end ||= -1 + + if env.params.query["listen"]? && env.params.query["listen"] == "true" + listen = true + env.params.query.delete_all("listen") + end + listen ||= false + + raw = env.params.query["raw"]?.try &.to_i? + raw ||= 0 + raw = raw == 1 + + quality = env.params.query["quality"]? + quality ||= "hd720" + + autoplay = env.params.query["autoplay"]?.try &.to_i? + autoplay ||= 0 + autoplay = autoplay == 1 + + controls = env.params.query["controls"]?.try &.to_i? + controls ||= 1 + controls = controls == 1 + + video_loop = env.params.query["loop"]?.try &.to_i? + video_loop ||= 0 + video_loop = video_loop == 1 + + begin + video = get_video(id, PG_DB) + rescue ex + error_message = ex.message + next templated "error" + end + + player_response = JSON.parse(video.info["player_response"]) + if player_response["captions"]? + captions = player_response["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]?.try &.as_a + end + captions ||= [] of JSON::Any + + if video.info["hlsvp"]? + hlsvp = video.info["hlsvp"] + + if Kemal.config.ssl || CONFIG.https_only + scheme = "https://" + else + scheme = "http://" + end + host = env.request.headers["Host"] + url = "#{scheme}#{host}" + + hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", url) + end + + fmt_stream = [] of HTTP::Params + video.info["url_encoded_fmt_stream_map"].split(",") do |string| + if !string.empty? + fmt_stream << HTTP::Params.parse(string) + end + end + + fmt_stream.each { |s| s.add("label", "#{s["quality"]} - #{s["type"].split(";")[0].split("/")[1]}") } + fmt_stream = fmt_stream.uniq { |s| s["label"] } + + adaptive_fmts = [] of HTTP::Params + if video.info.has_key?("adaptive_fmts") + video.info["adaptive_fmts"].split(",") do |string| + adaptive_fmts << HTTP::Params.parse(string) + end + end + + if adaptive_fmts[0]? && adaptive_fmts[0]["s"]? + adaptive_fmts.each do |fmt| + fmt["url"] += "&signature=" + decrypt_signature(fmt["s"], decrypt_function) + end + + fmt_stream.each do |fmt| + fmt["url"] += "&signature=" + decrypt_signature(fmt["s"], decrypt_function) + end + end + + audio_streams = adaptive_fmts.compact_map { |s| s["type"].starts_with?("audio") ? s : nil } + audio_streams.sort_by! { |s| s["bitrate"].to_i }.reverse! + audio_streams.each do |stream| + stream["bitrate"] = (stream["bitrate"].to_f64/1000).to_i.to_s + end + + if raw + url = fmt_stream[0]["url"] + + fmt_stream.each do |fmt| + if fmt["label"].split(" - ")[0] == quality + url = fmt["url"] + end + end + + next env.redirect url + end + + video.description = fill_links(video.description, "https", "www.youtube.com") + video.description = add_alt_links(video.description) + + description = video.description.gsub("
", " ") + description = description.gsub("
", " ") + description = XML.parse_html(description).content[0..200].gsub('"', """).gsub("\n", " ").strip(" ") + if description.empty? + description = " " + end + + if Kemal.config.ssl || CONFIG.https_only + scheme = "https://" + else + scheme = "http://" + end + host = env.request.headers["Host"] + host_url = "#{scheme}#{host}" + host_params = env.request.query_params + host_params.delete_all("v") + + if fmt_stream.select { |x| x["label"].starts_with? "hd720" }.size != 0 + thumbnail = "https://i.ytimg.com/vi/#{video.id}/maxresdefault.jpg" + else + thumbnail = "https://i.ytimg.com/vi/#{video.id}/hqdefault.jpg" + end + + rendered "embed" +end + +# Search + +get "/results" do |env| + search_query = env.params.query["search_query"]? + if search_query + env.redirect "/search?q=#{URI.escape(search_query)}" + else + env.redirect "/" + end +end + +get "/search" do |env| + if env.params.query["q"]? + query = env.params.query["q"] + else + next env.redirect "/" + end + + page = env.params.query["page"]?.try &.to_i? + page ||= 1 + + videos = search(query, page) + + templated "search" +end + +# Users + +get "/login" do |env| + user = env.get? "user" + if user + next env.redirect "/feed/subscriptions" + end + + referer = env.request.headers["referer"]? + referer ||= "/feed/subscriptions" + + account_type = env.params.query["type"]? + account_type ||= "invidious" + + if account_type == "invidious" + captcha = generate_captcha(HMAC_KEY) + end + + tfa = env.params.query["tfa"]? + tfa ||= false + + if referer.ends_with? "/login" + referer = "/feed/subscriptions" + end + + if referer.size > 64 + referer = "/feed/subscriptions" + end + + templated "login" +end + +# See https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L79 +post "/login" do |env| + referer = env.params.query["referer"]? + referer ||= "/feed/subscriptions" + + email = env.params.body["email"]? + password = env.params.body["password"]? + + account_type = env.params.query["type"]? + account_type ||= "google" + + if account_type == "google" + tfa_code = env.params.body["tfa"]?.try &.lchop("G-") + + begin + client = make_client(LOGIN_URL) + headers = HTTP::Headers.new + headers["Content-Type"] = "application/x-www-form-urlencoded;charset=utf-8" + headers["Google-Accounts-XSRF"] = "1" + + login_page = client.get("/ServiceLogin") + headers = login_page.cookies.add_request_headers(headers) + + login_page = XML.parse_html(login_page.body) + + inputs = {} of String => String + login_page.xpath_nodes(%q(//input[@type="submit"])).each do |node| + name = node["id"]? || node["name"]? + name ||= "" + value = node["value"]? + value ||= "" + + if name != "" && value != "" + inputs[name] = value + end + end + + login_page.xpath_nodes(%q(//input[@type="hidden"])).each do |node| + name = node["id"]? || node["name"]? + name ||= "" + value = node["value"]? + value ||= "" + + if name != "" && value != "" + inputs[name] = value + end + end + + lookup_req = %(["#{email}",null,[],null,"US",null,null,2,false,true,[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=1209600&continue=https%3A%2F%2Faccounts.google.com%2FManageAccount&followup=https%3A%2F%2Faccounts.google.com%2FManageAccount",null,[],4,[]],1,[null,null,[]],null,null,null,true],"#{email}"]) + + lookup_results = client.post("/_/signin/sl/lookup", headers, login_req(inputs, lookup_req)) + headers = lookup_results.cookies.add_request_headers(headers) + + lookup_results = lookup_results.body + lookup_results = lookup_results[5..-1] + lookup_results = JSON.parse(lookup_results) + + user_hash = lookup_results[0][2] + + challenge_req = %(["#{user_hash}",null,1,null,[1,null,null,null,["#{password}",null,true]],[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=1209600&continue=https%3A%2F%2Faccounts.google.com%2FManageAccount&followup=https%3A%2F%2Faccounts.google.com%2FManageAccount",null,[],4,[]],1,[null,null,[]],null,null,null,true]]) + + challenge_results = client.post("/_/signin/sl/challenge", headers, login_req(inputs, challenge_req)) + headers = challenge_results.cookies.add_request_headers(headers) + + challenge_results = challenge_results.body + challenge_results = challenge_results[5..-1] + challenge_results = JSON.parse(challenge_results) + + headers["Cookie"] = URI.unescape(headers["Cookie"]) + + if challenge_results[0][-1]?.try &.[5] == "INCORRECT_ANSWER_ENTERED" + error_message = "Incorrect password" + next templated "error" + end + + if challenge_results[0][-1][0].as_a? + # Prefer Authenticator app and SMS over unsupported protocols + if challenge_results[0][-1][0][0][8] != 6 || challenge_results[0][-1][0][0][8] != 9 + tfa = challenge_results[0][-1][0].as_a.select { |auth_type| auth_type[8] == 6 || auth_type[8] == 9 }[0] + select_challenge = "[#{challenge_results[0][-1][0].as_a.index(tfa).not_nil!}]" + + tl = challenge_results[1][2] + + tfa = client.post("/_/signin/selectchallenge?TL=#{tl}", headers, login_req(inputs, select_challenge)).body + tfa = tfa[5..-1] + tfa = JSON.parse(tfa)[0][-1] + else + tfa = challenge_results[0][-1][0][0] + end + + if tfa[2] == "TWO_STEP_VERIFICATION" + if tfa[5] == "QUOTA_EXCEEDED" + error_message = "Quota exceeded, try again in a few hours" + next templated "error" + end + + if !tfa_code + next env.redirect "/login?tfa=true&type=google" + end + + tl = challenge_results[1][2] + + request_type = tfa[8] + case request_type + when 6 + # Authenticator app + tfa_req = %(["#{user_hash}",null,2,null,[6,null,null,null,null,["#{tfa_code}",false]]]) + when 9 + # Voice or text message + tfa_req = %(["#{user_hash}",null,2,null,[9,null,null,null,null,null,null,null,[null,"#{tfa_code}",false,2]]]) + else + error_message = "Unable to login, make sure two-factor authentication (Authenticator or SMS) is enabled." + next templated "error" + end + + challenge_results = client.post("/_/signin/challenge?hl=en&TL=#{tl}", headers, login_req(inputs, tfa_req)) + headers = challenge_results.cookies.add_request_headers(headers) + + challenge_results = challenge_results.body + challenge_results = challenge_results[5..-1] + challenge_results = JSON.parse(challenge_results) + + if challenge_results[0][-1]?.try &.[5] == "INCORRECT_ANSWER_ENTERED" + error_message = "Invalid TFA code" + next templated "error" + end + end + end + + login_res = challenge_results[0][13][2].to_s + + login = client.get(login_res, headers) + headers = login.cookies.add_request_headers(headers) + + login = client.get(login.headers["Location"], headers) + + headers = HTTP::Headers.new + headers = login.cookies.add_request_headers(headers) + + sid = login.cookies["SID"].value + + client = make_client(YT_URL) + user = get_user(sid, client, headers, PG_DB) + + # We are now logged in + + host = URI.parse(env.request.headers["Host"]).host + + login.cookies.each do |cookie| + if Kemal.config.ssl || CONFIG.https_only + cookie.secure = true + else + cookie.secure = false + end + + cookie.extension = cookie.extension.not_nil!.gsub(".youtube.com", host) + cookie.extension = cookie.extension.not_nil!.gsub("Secure; ", "") + end + + login.cookies.add_response_headers(env.response.headers) + + env.redirect referer + rescue ex + error_message = "Login failed. This may be because two-factor authentication is not enabled on your account." + next templated "error" + end + elsif account_type == "invidious" + challenge_response = env.params.body["challenge_response"]? + token = env.params.body["token"]? + + action = env.params.body["action"]? + action ||= "signin" + + if !email + error_message = "User ID is a required field" + next templated "error" + end + + if !password + error_message = "Password is a required field" + next templated "error" + end + + if !challenge_response || !token + error_message = "CAPTCHA is a required field" + next templated "error" + end + + challenge_response = challenge_response.lstrip('0') + if OpenSSL::HMAC.digest(:sha256, HMAC_KEY, challenge_response) == Base64.decode(token) + else + error_message = "Invalid CAPTCHA response" + next templated "error" + end + + if action == "signin" + user = PG_DB.query_one?("SELECT * FROM users WHERE email = $1 AND password IS NOT NULL", email, as: User) + + if !user + error_message = "Invalid username or password" + next templated "error" + end + + if !user.password + error_message = "Please sign in using 'Sign in with Google'" + next templated "error" + end + + if Crypto::Bcrypt::Password.new(user.password.not_nil!) == password + sid = Base64.encode(Random::Secure.random_bytes(50)) + PG_DB.exec("UPDATE users SET id = $1 WHERE email = $2", sid, email) + + if Kemal.config.ssl || CONFIG.https_only + secure = true + else + secure = false + end + + env.response.cookies["SID"] = HTTP::Cookie.new(name: "SID", value: sid, expires: Time.now + 2.years, + secure: secure, http_only: true) + else + error_message = "Invalid username or password" + next templated "error" + end + elsif action == "register" + user = PG_DB.query_one?("SELECT * FROM users WHERE email = $1 AND password IS NOT NULL", email, as: User) + if user + error_message = "Please sign in" + next templated "error" + end + + sid = Base64.encode(Random::Secure.random_bytes(50)) + user = create_user(sid, email, password) + user_array = user.to_a + + user_array[5] = user_array[5].to_json + args = arg_array(user_array) + + PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array) + + if Kemal.config.ssl || CONFIG.https_only + secure = true + else + secure = false + end + + env.response.cookies["SID"] = HTTP::Cookie.new(name: "SID", value: sid, expires: Time.now + 2.years, + secure: secure, http_only: true) + end + + env.redirect referer + end +end + +get "/signout" do |env| + referer = env.request.headers["referer"]? + referer ||= "/" + + env.request.cookies.each do |cookie| + cookie.expires = Time.new(1990, 1, 1) + end + + env.request.cookies.add_response_headers(env.response.headers) + env.redirect referer +end + +get "/preferences" do |env| + user = env.get? "user" + + referer = env.request.headers["referer"]? + referer ||= "/preferences" + + if referer.size > 64 + referer = "/preferences" + end + + if user + user = user.as(User) + templated "preferences" + else + env.redirect referer + end +end + +post "/preferences" do |env| + user = env.get? "user" + + referer = env.params.query["referer"]? + referer ||= "/preferences" + + if user + user = user.as(User) + + video_loop = env.params.body["video_loop"]?.try &.as(String) + video_loop ||= "off" + video_loop = video_loop == "on" + + autoplay = env.params.body["autoplay"]?.try &.as(String) + autoplay ||= "off" + autoplay = autoplay == "on" + + speed = env.params.body["speed"]?.try &.as(String).to_f? + speed ||= 1.0 + + quality = env.params.body["quality"]?.try &.as(String) + quality ||= "hd720" + + volume = env.params.body["volume"]?.try &.as(String).to_i? + volume ||= 100 + + comments = env.params.body["comments"]? + comments ||= "youtube" + + redirect_feed = env.params.body["redirect_feed"]?.try &.as(String) + redirect_feed ||= "off" + redirect_feed = redirect_feed == "on" + + dark_mode = env.params.body["dark_mode"]?.try &.as(String) + dark_mode ||= "off" + dark_mode = dark_mode == "on" + + thin_mode = env.params.body["thin_mode"]?.try &.as(String) + thin_mode ||= "off" + thin_mode = thin_mode == "on" + + max_results = env.params.body["max_results"]?.try &.as(String).to_i? + max_results ||= 40 + + sort = env.params.body["sort"]?.try &.as(String) + sort ||= "published" + + latest_only = env.params.body["latest_only"]?.try &.as(String) + latest_only ||= "off" + latest_only = latest_only == "on" + + unseen_only = env.params.body["unseen_only"]?.try &.as(String) + unseen_only ||= "off" + unseen_only = unseen_only == "on" + + notifications_only = env.params.body["notifications_only"]?.try &.as(String) + notifications_only ||= "off" + notifications_only = notifications_only == "on" + + preferences = { + "video_loop" => video_loop, + "autoplay" => autoplay, + "speed" => speed, + "quality" => quality, + "volume" => volume, + "comments" => comments, + "redirect_feed" => redirect_feed, + "dark_mode" => dark_mode, + "thin_mode" => thin_mode, + "max_results" => max_results, + "sort" => sort, + "latest_only" => latest_only, + "unseen_only" => unseen_only, + "notifications_only" => notifications_only, + }.to_json + + PG_DB.exec("UPDATE users SET preferences = $1 WHERE email = $2", preferences, user.email) + end + + env.redirect referer +end + +# Function that is useful if you have multiple channels that don't have +# the bell dinged. Request parameters are fairly self-explanatory, +# receive_all_updates = true and receive_post_updates = true will ding all +# channels. Calling /modify_notifications without any arguments will +# request all notifications from all channels. +# /modify_notifications?receive_all_updates=false&receive_no_updates=false +# will "unding" all subscriptions. +get "/modify_notifications" do |env| + user = env.get? "user" + + referer = env.request.headers["referer"]? + referer ||= "/" + + if user + user = user.as(User) + + channel_req = {} of String => String + + channel_req["receive_all_updates"] = env.params.query["receive_all_updates"]? || "true" + channel_req["receive_no_updates"] = env.params.query["receive_no_updates"]? || "" + channel_req["receive_post_updates"] = env.params.query["receive_post_updates"]? || "true" + + channel_req.reject! { |k, v| v != "true" && v != "false" } + + headers = HTTP::Headers.new + headers["Cookie"] = env.request.headers["Cookie"] + + client = make_client(YT_URL) + subs = client.get("/subscription_manager?disable_polymer=1", headers) + headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] + match = subs.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) + if match + session_token = match["session_token"] + else + next env.redirect referer + end + + channel_req["session_token"] = session_token + + headers["content-type"] = "application/x-www-form-urlencoded" + subs = XML.parse_html(subs.body) + subs.xpath_nodes(%q(//a[@class="subscription-title yt-uix-sessionlink"]/@href)).each do |channel| + channel_id = channel.content.lstrip("/channel/").not_nil! + + channel_req["channel_id"] = channel_id + + client.post("/subscription_ajax?action_update_subscription_preferences=1", headers, + HTTP::Params.encode(channel_req)).body + end + end + + env.redirect referer +end + +get "/subscription_manager" do |env| + user = env.get? "user" + + if !user + next env.redirect "/" + end + + user = user.as(User) + + if !user.password + # Refresh account + headers = HTTP::Headers.new + headers["Cookie"] = env.request.headers["Cookie"] + + client = make_client(YT_URL) + user = get_user(user.id, client, headers, PG_DB) + end + + action_takeout = env.params.query["action_takeout"]?.try &.to_i? + action_takeout ||= 0 + action_takeout = action_takeout == 1 + + format = env.params.query["format"]? + format ||= "rss" + + client = make_client(YT_URL) + + subscriptions = [] of InvidiousChannel + user.subscriptions.each do |ucid| + begin + subscriptions << get_channel(ucid, client, PG_DB, false) + rescue ex + next + end + end + subscriptions.sort_by! { |channel| channel.author.downcase } + + if action_takeout + if Kemal.config.ssl || CONFIG.https_only + scheme = "https://" + else + scheme = "http://" + end + host = env.request.headers["Host"] + + url = "#{scheme}#{host}" + + if format == "json" + env.response.content_type = "application/json" + env.response.headers["content-disposition"] = "attachment" + next { + "subscriptions" => user.subscriptions, + "watch_history" => user.watched, + "preferences" => user.preferences, + }.to_json + else + env.response.content_type = "application/xml" + env.response.headers["content-disposition"] = "attachment" + export = XML.build do |xml| + xml.element("opml", version: "1.1") do + xml.element("body") do + if format == "newpipe" + title = "YouTube Subscriptions" + else + title = "Invidious Subscriptions" + end + + xml.element("outline", text: title, title: title) do + subscriptions.each do |channel| + if format == "newpipe" + xmlUrl = "https://www.youtube.com/feeds/videos.xml?channel_id=#{channel.id}" + else + xmlUrl = "#{url}/feed/channel/#{channel.id}" + end + + xml.element("outline", text: channel.author, title: channel.author, + "type": "rss", xmlUrl: xmlUrl) + end + end + end + end + end + + next export.gsub(%(\n), "") + end + end + + templated "subscription_manager" +end + +get "/data_control" do |env| + user = env.get? "user" + referer = env.request.headers["referer"]? + referer ||= "/" + + if user + user = user.as(User) + + templated "data_control" + else + env.redirect referer + end +end + +post "/data_control" do |env| + user = env.get? "user" + referer = env.request.headers["referer"]? + referer ||= "/" + + if user + user = user.as(User) + + HTTP::FormData.parse(env.request) do |part| + body = part.body.gets_to_end + if body.empty? + next + end + + case part.name + when "import_invidious" + body = JSON.parse(body) + body["subscriptions"].as_a.each do |ucid| + ucid = ucid.as_s + if !user.subscriptions.includes? ucid + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) + + begin + client = make_client(YT_URL) + get_channel(ucid, client, PG_DB, false, false) + rescue ex + next + end + end + end + + body["watch_history"].as_a.each do |id| + id = id.as_s + if !user.watched.includes? id + PG_DB.exec("UPDATE users SET watched = array_append(watched,$1) WHERE id = $2", id, user.id) + end + end + + PG_DB.exec("UPDATE users SET preferences = $1 WHERE id = $2", body["preferences"].to_json, user.id) + when "import_youtube" + subscriptions = XML.parse(body) + subscriptions.xpath_nodes(%q(//outline[@type="rss"])).each do |channel| + ucid = channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] + + if !user.subscriptions.includes? ucid + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) + + begin + client = make_client(YT_URL) + get_channel(ucid, client, PG_DB, false, false) + rescue ex + next + end + end + end + when "import_newpipe_subscriptions" + body = JSON.parse(body) + body["subscriptions"].as_a.each do |channel| + ucid = channel["url"].as_s.match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] + + if !user.subscriptions.includes? ucid + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) + + begin + client = make_client(YT_URL) + get_channel(ucid, client, PG_DB, false, false) + rescue ex + next + end + end + end + when "import_newpipe" + Zip::Reader.open(body) do |file| + file.each_entry do |entry| + if entry.filename == "newpipe.db" + # We do this because the SQLite driver cannot parse a database from an IO + # Currently: channel URLs can **only** be subscriptions, and + # video URLs can **only** be watch history, so this works okay for now. + + db = entry.io.gets_to_end + db.scan(/youtube\.com\/watch\?v\=(?[a-zA-Z0-9_-]{11})/) do |md| + if !user.watched.includes? md["id"] + PG_DB.exec("UPDATE users SET watched = array_append(watched,$1) WHERE id = $2", md["id"], user.id) + end + end + + db.scan(/youtube\.com\/channel\/(?[a-zA-Z0-9_-]{22})/) do |md| + ucid = md["ucid"] + if !user.subscriptions.includes? ucid + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) + + begin + client = make_client(YT_URL) + get_channel(ucid, client, PG_DB, false, false) + rescue ex + next + end + end + end + end + end + end + end + end + end + + env.redirect referer +end + +get "/subscription_ajax" do |env| + user = env.get? "user" + referer = env.request.headers["referer"]? + referer ||= "/" + + if user + user = user.as(User) + + if env.params.query["action_create_subscription_to_channel"]? + action = "action_create_subscription_to_channel" + elsif env.params.query["action_remove_subscriptions"]? + action = "action_remove_subscriptions" + else + next env.redirect referer + end + + channel_id = env.params.query["c"]? + channel_id ||= "" + + if !user.password + headers = HTTP::Headers.new + headers["Cookie"] = env.request.headers["Cookie"] + + client = make_client(YT_URL) + subs = client.get("/subscription_manager?disable_polymer=1", headers) + headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] + match = subs.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) + if match + session_token = match["session_token"] + else + next env.redirect "/" + end + + headers["content-type"] = "application/x-www-form-urlencoded" + + post_req = { + "session_token" => session_token, + } + post_req = HTTP::Params.encode(post_req) + post_url = "/subscription_ajax?#{action}=1&c=#{channel_id}" + + # Update user + if client.post(post_url, headers, post_req).status_code == 200 + sid = user.id + + case action + when .starts_with? "action_create" + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", channel_id, sid) + when .starts_with? "action_remove" + PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid) + end + end + else + sid = user.id + + case action + when .starts_with? "action_create" + if !user.subscriptions.includes? channel_id + PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", channel_id, sid) + + client = make_client(YT_URL) + get_channel(channel_id, client, PG_DB, false, false) + end + when .starts_with? "action_remove" + PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid) + end + end + end + + env.redirect referer +end + +get "/clear_watch_history" do |env| + user = env.get? "user" + referer = env.request.headers["referer"]? + referer ||= "/" + + if user + user = user.as(User) + + PG_DB.exec("UPDATE users SET watched = '{}' WHERE id = $1", user.id) + end + + env.redirect referer +end + +# Feeds + +get "/feed/subscriptions" do |env| + user = env.get? "user" + + if user + user = user.as(User) + preferences = user.preferences + + # Refresh account + headers = HTTP::Headers.new + headers["Cookie"] = env.request.headers["Cookie"] + + if !user.password + client = make_client(YT_URL) + user = get_user(user.id, client, headers, PG_DB) + end + + max_results = preferences.max_results + max_results ||= env.params.query["max_results"]?.try &.to_i? + max_results ||= 40 + + page = env.params.query["page"]?.try &.to_i? + page ||= 1 + + if max_results < 0 + limit = nil + offset = (page - 1) * 1 + else + limit = max_results + offset = (page - 1) * max_results + end + + notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, + as: Array(String)) + if preferences.notifications_only && !notifications.empty? + args = arg_array(notifications) + + videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE id IN (#{args}) + ORDER BY published DESC", notifications, as: ChannelVideo) + notifications = [] of ChannelVideo + + videos.sort_by! { |video| video.published }.reverse! + + case preferences.sort + when "alphabetically" + videos.sort_by! { |video| video.title } + when "alphabetically - reverse" + videos.sort_by! { |video| video.title }.reverse! + when "channel name" + videos.sort_by! { |video| video.author } + when "channel name - reverse" + videos.sort_by! { |video| video.author }.reverse! + end + else + if preferences.latest_only + if preferences.unseen_only + ucids = arg_array(user.subscriptions) + if user.watched.empty? + watched = "'{}'" + else + watched = arg_array(user.watched, user.subscriptions.size + 1) + end + + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ + ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", + user.subscriptions + user.watched, as: ChannelVideo) + else + args = arg_array(user.subscriptions) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ + ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) + end + + videos.sort_by! { |video| video.published }.reverse! + else + if preferences.unseen_only + ucids = arg_array(user.subscriptions, 3) + if user.watched.empty? + watched = "'{}'" + else + watched = arg_array(user.watched, user.subscriptions.size + 3) + end + + videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{ucids}) \ + AND id NOT IN (#{watched}) ORDER BY published DESC LIMIT $1 OFFSET $2", + [limit, offset] + user.subscriptions + user.watched, as: ChannelVideo) + else + args = arg_array(user.subscriptions, 3) + videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ + ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) + end + end + + case preferences.sort + when "alphabetically" + videos.sort_by! { |video| video.title } + when "alphabetically - reverse" + videos.sort_by! { |video| video.title }.reverse! + when "channel name" + videos.sort_by! { |video| video.author } + when "channel name - reverse" + videos.sort_by! { |video| video.author }.reverse! + end + + # TODO: Add option to disable picking out notifications from regular feed + notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, + as: Array(String)) + + notifications = videos.select { |v| notifications.includes? v.id } + videos = videos - notifications + end + + if !limit + videos = videos[0..max_results] + end + + PG_DB.exec("UPDATE users SET notifications = $1, updated = $2 WHERE id = $3", [] of String, Time.now, + user.id) + user.notifications = [] of String + env.set "user", user + + templated "subscriptions" + else + env.redirect "/" + end +end + +get "/feed/channel/:ucid" do |env| + ucid = env.params.url["ucid"] + + client = make_client(YT_URL) + if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) + rss = client.get("/feeds/videos.xml?user=#{ucid}").body + rss = XML.parse_html(rss) + + ucid = rss.xpath_node("//feed/channelid") + if ucid + ucid = ucid.content + else + env.response.content_type = "application/json" + next {"error" => "User does not exist"}.to_json + end + end + + url = produce_videos_url(ucid) + response = client.get(url) + + channel = get_channel(ucid, client, PG_DB, pull_all_videos: false) + + json = JSON.parse(response.body) + if !json["content_html"]? || json["content_html"].as_s.empty? + error_message = "This channel does not exist or has no videos." + next templated "error" + end + + content_html = json["content_html"].as_s + if content_html.empty? + halt env, status_code: 403 + end + document = XML.parse_html(content_html) + + if Kemal.config.ssl || CONFIG.https_only + scheme = "https://" + else + scheme = "http://" + end + host = env.request.headers["Host"] + path = env.request.path + + feed = XML.build(indent: " ", encoding: "UTF-8") do |xml| + xml.element("feed", "xmlns:yt": "http://www.youtube.com/xml/schemas/2015", + "xmlns:media": "http://search.yahoo.com/mrss/", xmlns: "http://www.w3.org/2005/Atom") do + xml.element("link", rel: "self", href: "#{scheme}#{host}#{path}") + xml.element("id") { xml.text "yt:channel:#{ucid}" } + xml.element("yt:channelId") { xml.text ucid } + xml.element("title") { xml.text channel.author } + xml.element("link", rel: "alternate", href: "#{scheme}#{host}/channel/#{ucid}") + + xml.element("author") do + xml.element("name") { xml.text channel.author } + xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{ucid}" } + end + + document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node| + anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)).not_nil! + title = anchor.content.strip + video_id = anchor["href"].lchop("/watch?v=") + + view_count = node.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[2])).not_nil! + view_count = view_count.content.rchop(" views") + if view_count = "No" + view_count = 0 + else + view_count = view_count.delete(",").to_i + end + + descriptionHtml = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])) + if !descriptionHtml + description = "" + descriptionHtml = "" + else + descriptionHtml = descriptionHtml.to_s + description = descriptionHtml.gsub("
", "\n") + description = description.gsub("
", "\n") + description = XML.parse_html(description).content.strip("\n ") + end + + published = node.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[1])) + if !published + next + end + published = published.content + published = decode_date(published) + + xml.element("entry") do + xml.element("id") { xml.text "yt:video:#{video_id}" } + xml.element("yt:videoId") { xml.text video_id } + xml.element("yt:channelId") { xml.text ucid } + xml.element("title") { xml.text title } + xml.element("link", rel: "alternate", href: "#{scheme}#{host}/watch?v=#{video_id}") + + xml.element("author") do + xml.element("name") { xml.text channel.author } + xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{ucid}" } + end + + xml.element("published") { xml.text published.to_s("%Y-%m-%dT%H:%M:%S%:z") } + + xml.element("media:group") do + xml.element("media:title") { xml.text title } + xml.element("media:thumbnail", url: "https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg", + width: "480", height: "360") + xml.element("media:description") { xml.text description } + end + + xml.element("media:community") do + xml.element("media:statistics", views: view_count) + end + end + end + end + end + + env.response.content_type = "text/xml" + feed +end + +get "/feed/private" do |env| + token = env.params.query["token"]? + + if !token + halt env, status_code: 403 + end + + user = PG_DB.query_one?("SELECT * FROM users WHERE token = $1", token.strip, as: User) + if !user + halt env, status_code: 403 + end + + max_results = env.params.query["max_results"]?.try &.to_i? + max_results ||= 40 + + page = env.params.query["page"]?.try &.to_i? + page ||= 1 + + if max_results < 0 + limit = nil + offset = (page - 1) * 1 + else + limit = max_results + offset = (page - 1) * max_results + end + + latest_only = env.params.query["latest_only"]?.try &.to_i? + latest_only ||= 0 + latest_only = latest_only == 1 + + if latest_only + args = arg_array(user.subscriptions) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ + ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) + videos.sort_by! { |video| video.published }.reverse! + else + args = arg_array(user.subscriptions, 3) + videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ + ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) + end + + sort = env.params.query["sort"]? + sort ||= "published" + + case sort + when "alphabetically" + videos.sort_by! { |video| video.title } + when "reverse_alphabetically" + videos.sort_by! { |video| video.title }.reverse! + when "channel_name" + videos.sort_by! { |video| video.author } + when "reverse_channel_name" + videos.sort_by! { |video| video.author }.reverse! + end + + if Kemal.config.ssl || CONFIG.https_only + scheme = "https://" + else + scheme = "http://" + end + + if !limit + videos = videos[0..max_results] + end + + host = env.request.headers["Host"] + path = env.request.path + query = env.request.query.not_nil! + + feed = XML.build(indent: " ", encoding: "UTF-8") do |xml| + xml.element("feed", xmlns: "http://www.w3.org/2005/Atom", "xmlns:media": "http://search.yahoo.com/mrss/", + "xml:lang": "en-US") do + xml.element("link", "type": "text/html", rel: "alternate", href: "#{scheme}#{host}/feed/subscriptions") + xml.element("link", "type": "application/atom+xml", rel: "self", href: "#{scheme}#{host}#{path}?#{query}") + xml.element("title") { xml.text "Invidious Private Feed for #{user.email}" } + + videos.each do |video| + xml.element("entry") do + xml.element("id") { xml.text "yt:video:#{video.id}" } + xml.element("yt:videoId") { xml.text video.id } + xml.element("yt:channelId") { xml.text video.ucid } + xml.element("title") { xml.text video.title } + xml.element("link", rel: "alternate", href: "#{scheme}#{host}/watch?v=#{video.id}") + + xml.element("author") do + xml.element("name") { xml.text video.author } + xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{video.ucid}" } + end + + xml.element("published") { xml.text video.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } + xml.element("updated") { xml.text video.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") } + + xml.element("media:group") do + xml.element("media:title") { xml.text video.title } + xml.element("media:thumbnail", url: "https://i.ytimg.com/vi/#{video.id}/hqdefault.jpg", + width: "480", height: "360") + end + end + end + end + end + + env.response.content_type = "application/atom+xml" + feed +end + +# Channels + +get "/user/:user" do |env| + user = env.params.url["user"] + env.redirect "/channel/#{user}" +end + +get "/channel/:ucid" do |env| + user = env.get? "user" + if user + user = user.as(User) + subscriptions = user.subscriptions + end + subscriptions ||= [] of String + + ucid = env.params.url["ucid"] + + page = env.params.query["page"]?.try &.to_i? + page ||= 1 + + client = make_client(YT_URL) + + if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) + rss = client.get("/feeds/videos.xml?user=#{ucid}").body + rss = XML.parse_html(rss) + + ucid = rss.xpath_node("//feed/channelid") + if ucid + ucid = ucid.content + else + error_message = "User does not exist" + next templated "error" + end + + env.redirect "/channel/#{ucid}" + end + + url = produce_playlist_url(ucid, (page - 1) * 100) + response = client.get(url) + + json = JSON.parse(response.body) + if !json["content_html"]? || json["content_html"].as_s.empty? + error_message = "This channel does not exist or has no videos." + next templated "error" + end + + if json["content_html"].as_s.strip(" \n").empty? + rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body + rss = XML.parse_html(rss) + author = rss.xpath_node("//feed/author/name").not_nil!.content + + videos = [] of ChannelVideo + + next templated "channel" + end + + document = XML.parse_html(json["content_html"].as_s) + author = document.xpath_node(%q(//div[@class="pl-video-owner"]/a)).not_nil!.content + + videos = [] of ChannelVideo + document.xpath_nodes(%q(//a[contains(@class,"pl-video-title-link")])).each do |node| + href = URI.parse(node["href"]) + id = HTTP::Params.parse(href.query.not_nil!)["v"] + title = node.content + + videos << ChannelVideo.new(id, title, Time.now, Time.now, ucid, author) + end + + templated "channel" +end + +get "/channel/:ucid/videos" do |env| + ucid = env.params.url["ucid"] + params = env.request.query + + if !params || params.empty? + params = "" + else + params = "?#{params}" + end + + env.redirect "/channel/#{ucid}#{params}" +end + +# API Endpoints + get "/api/v1/captions/:id" do |env| id = env.params.url["id"] @@ -1392,1360 +2659,6 @@ get "/api/v1/search" do |env| results end -get "/embed/:id" do |env| - if env.params.url["id"]? - id = env.params.url["id"] - else - next env.redirect "/" - end - - if env.params.query["start"]? - video_start = decode_time(env.params.query["start"]) - end - - if env.params.query["t"]? - video_start = decode_time(env.params.query["t"]) - end - video_start ||= 0 - - if env.params.query["end"]? - video_end = decode_time(env.params.query["end"]) - end - video_end ||= -1 - - if env.params.query["listen"]? && env.params.query["listen"] == "true" - listen = true - env.params.query.delete_all("listen") - end - listen ||= false - - raw = env.params.query["raw"]?.try &.to_i? - raw ||= 0 - raw = raw == 1 - - quality = env.params.query["quality"]? - quality ||= "hd720" - - autoplay = env.params.query["autoplay"]?.try &.to_i? - autoplay ||= 0 - autoplay = autoplay == 1 - - controls = env.params.query["controls"]?.try &.to_i? - controls ||= 1 - controls = controls == 1 - - video_loop = env.params.query["loop"]?.try &.to_i? - video_loop ||= 0 - video_loop = video_loop == 1 - - begin - video = get_video(id, PG_DB) - rescue ex - error_message = ex.message - next templated "error" - end - - player_response = JSON.parse(video.info["player_response"]) - if player_response["captions"]? - captions = player_response["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]?.try &.as_a - end - captions ||= [] of JSON::Any - - if video.info["hlsvp"]? - hlsvp = video.info["hlsvp"] - - if Kemal.config.ssl || CONFIG.https_only - scheme = "https://" - else - scheme = "http://" - end - host = env.request.headers["Host"] - url = "#{scheme}#{host}" - - hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", url) - end - - fmt_stream = [] of HTTP::Params - video.info["url_encoded_fmt_stream_map"].split(",") do |string| - if !string.empty? - fmt_stream << HTTP::Params.parse(string) - end - end - - fmt_stream.each { |s| s.add("label", "#{s["quality"]} - #{s["type"].split(";")[0].split("/")[1]}") } - fmt_stream = fmt_stream.uniq { |s| s["label"] } - - adaptive_fmts = [] of HTTP::Params - if video.info.has_key?("adaptive_fmts") - video.info["adaptive_fmts"].split(",") do |string| - adaptive_fmts << HTTP::Params.parse(string) - end - end - - if adaptive_fmts[0]? && adaptive_fmts[0]["s"]? - adaptive_fmts.each do |fmt| - fmt["url"] += "&signature=" + decrypt_signature(fmt["s"], decrypt_function) - end - - fmt_stream.each do |fmt| - fmt["url"] += "&signature=" + decrypt_signature(fmt["s"], decrypt_function) - end - end - - audio_streams = adaptive_fmts.compact_map { |s| s["type"].starts_with?("audio") ? s : nil } - audio_streams.sort_by! { |s| s["bitrate"].to_i }.reverse! - audio_streams.each do |stream| - stream["bitrate"] = (stream["bitrate"].to_f64/1000).to_i.to_s - end - - if raw - url = fmt_stream[0]["url"] - - fmt_stream.each do |fmt| - if fmt["label"].split(" - ")[0] == quality - url = fmt["url"] - end - end - - next env.redirect url - end - - video.description = fill_links(video.description, "https", "www.youtube.com") - video.description = add_alt_links(video.description) - - description = video.description.gsub("
", " ") - description = description.gsub("
", " ") - description = XML.parse_html(description).content[0..200].gsub('"', """).gsub("\n", " ").strip(" ") - if description.empty? - description = " " - end - - if Kemal.config.ssl || CONFIG.https_only - scheme = "https://" - else - scheme = "http://" - end - host = env.request.headers["Host"] - host_url = "#{scheme}#{host}" - host_params = env.request.query_params - host_params.delete_all("v") - - if fmt_stream.select { |x| x["label"].starts_with? "hd720" }.size != 0 - thumbnail = "https://i.ytimg.com/vi/#{video.id}/maxresdefault.jpg" - else - thumbnail = "https://i.ytimg.com/vi/#{video.id}/hqdefault.jpg" - end - - rendered "embed" -end - -get "/results" do |env| - search_query = env.params.query["search_query"]? - if search_query - env.redirect "/search?q=#{URI.escape(search_query)}" - else - env.redirect "/" - end -end - -get "/search" do |env| - if env.params.query["q"]? - query = env.params.query["q"] - else - next env.redirect "/" - end - - page = env.params.query["page"]?.try &.to_i? - page ||= 1 - - videos = search(query, page) - - templated "search" -end - -get "/login" do |env| - user = env.get? "user" - if user - next env.redirect "/feed/subscriptions" - end - - referer = env.request.headers["referer"]? - referer ||= "/feed/subscriptions" - - account_type = env.params.query["type"]? - account_type ||= "invidious" - - if account_type == "invidious" - captcha = generate_captcha(HMAC_KEY) - end - - tfa = env.params.query["tfa"]? - tfa ||= false - - if referer.ends_with? "/login" - referer = "/feed/subscriptions" - end - - if referer.size > 64 - referer = "/feed/subscriptions" - end - - templated "login" -end - -# See https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L79 -post "/login" do |env| - referer = env.params.query["referer"]? - referer ||= "/feed/subscriptions" - - email = env.params.body["email"]? - password = env.params.body["password"]? - - account_type = env.params.query["type"]? - account_type ||= "google" - - if account_type == "google" - tfa_code = env.params.body["tfa"]?.try &.lchop("G-") - - begin - client = make_client(LOGIN_URL) - headers = HTTP::Headers.new - headers["Content-Type"] = "application/x-www-form-urlencoded;charset=utf-8" - headers["Google-Accounts-XSRF"] = "1" - - login_page = client.get("/ServiceLogin") - headers = login_page.cookies.add_request_headers(headers) - - login_page = XML.parse_html(login_page.body) - - inputs = {} of String => String - login_page.xpath_nodes(%q(//input[@type="submit"])).each do |node| - name = node["id"]? || node["name"]? - name ||= "" - value = node["value"]? - value ||= "" - - if name != "" && value != "" - inputs[name] = value - end - end - - login_page.xpath_nodes(%q(//input[@type="hidden"])).each do |node| - name = node["id"]? || node["name"]? - name ||= "" - value = node["value"]? - value ||= "" - - if name != "" && value != "" - inputs[name] = value - end - end - - lookup_req = %(["#{email}",null,[],null,"US",null,null,2,false,true,[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=1209600&continue=https%3A%2F%2Faccounts.google.com%2FManageAccount&followup=https%3A%2F%2Faccounts.google.com%2FManageAccount",null,[],4,[]],1,[null,null,[]],null,null,null,true],"#{email}"]) - - lookup_results = client.post("/_/signin/sl/lookup", headers, login_req(inputs, lookup_req)) - headers = lookup_results.cookies.add_request_headers(headers) - - lookup_results = lookup_results.body - lookup_results = lookup_results[5..-1] - lookup_results = JSON.parse(lookup_results) - - user_hash = lookup_results[0][2] - - challenge_req = %(["#{user_hash}",null,1,null,[1,null,null,null,["#{password}",null,true]],[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=1209600&continue=https%3A%2F%2Faccounts.google.com%2FManageAccount&followup=https%3A%2F%2Faccounts.google.com%2FManageAccount",null,[],4,[]],1,[null,null,[]],null,null,null,true]]) - - challenge_results = client.post("/_/signin/sl/challenge", headers, login_req(inputs, challenge_req)) - headers = challenge_results.cookies.add_request_headers(headers) - - challenge_results = challenge_results.body - challenge_results = challenge_results[5..-1] - challenge_results = JSON.parse(challenge_results) - - headers["Cookie"] = URI.unescape(headers["Cookie"]) - - if challenge_results[0][-1]?.try &.[5] == "INCORRECT_ANSWER_ENTERED" - error_message = "Incorrect password" - next templated "error" - end - - if challenge_results[0][-1][0].as_a? - # Prefer Authenticator app and SMS over unsupported protocols - if challenge_results[0][-1][0][0][8] != 6 || challenge_results[0][-1][0][0][8] != 9 - tfa = challenge_results[0][-1][0].as_a.select { |auth_type| auth_type[8] == 6 || auth_type[8] == 9 }[0] - select_challenge = "[#{challenge_results[0][-1][0].as_a.index(tfa).not_nil!}]" - - tl = challenge_results[1][2] - - tfa = client.post("/_/signin/selectchallenge?TL=#{tl}", headers, login_req(inputs, select_challenge)).body - tfa = tfa[5..-1] - tfa = JSON.parse(tfa)[0][-1] - else - tfa = challenge_results[0][-1][0][0] - end - - if tfa[2] == "TWO_STEP_VERIFICATION" - if tfa[5] == "QUOTA_EXCEEDED" - error_message = "Quota exceeded, try again in a few hours" - next templated "error" - end - - if !tfa_code - next env.redirect "/login?tfa=true&type=google" - end - - tl = challenge_results[1][2] - - request_type = tfa[8] - case request_type - when 6 - # Authenticator app - tfa_req = %(["#{user_hash}",null,2,null,[6,null,null,null,null,["#{tfa_code}",false]]]) - when 9 - # Voice or text message - tfa_req = %(["#{user_hash}",null,2,null,[9,null,null,null,null,null,null,null,[null,"#{tfa_code}",false,2]]]) - else - error_message = "Unable to login, make sure two-factor authentication (Authenticator or SMS) is enabled." - next templated "error" - end - - challenge_results = client.post("/_/signin/challenge?hl=en&TL=#{tl}", headers, login_req(inputs, tfa_req)) - headers = challenge_results.cookies.add_request_headers(headers) - - challenge_results = challenge_results.body - challenge_results = challenge_results[5..-1] - challenge_results = JSON.parse(challenge_results) - - if challenge_results[0][-1]?.try &.[5] == "INCORRECT_ANSWER_ENTERED" - error_message = "Invalid TFA code" - next templated "error" - end - end - end - - login_res = challenge_results[0][13][2].to_s - - login = client.get(login_res, headers) - headers = login.cookies.add_request_headers(headers) - - login = client.get(login.headers["Location"], headers) - - headers = HTTP::Headers.new - headers = login.cookies.add_request_headers(headers) - - sid = login.cookies["SID"].value - - client = make_client(YT_URL) - user = get_user(sid, client, headers, PG_DB) - - # We are now logged in - - host = URI.parse(env.request.headers["Host"]).host - - login.cookies.each do |cookie| - if Kemal.config.ssl || CONFIG.https_only - cookie.secure = true - else - cookie.secure = false - end - - cookie.extension = cookie.extension.not_nil!.gsub(".youtube.com", host) - cookie.extension = cookie.extension.not_nil!.gsub("Secure; ", "") - end - - login.cookies.add_response_headers(env.response.headers) - - env.redirect referer - rescue ex - error_message = "Login failed. This may be because two-factor authentication is not enabled on your account." - next templated "error" - end - elsif account_type == "invidious" - challenge_response = env.params.body["challenge_response"]? - token = env.params.body["token"]? - - action = env.params.body["action"]? - action ||= "signin" - - if !email - error_message = "User ID is a required field" - next templated "error" - end - - if !password - error_message = "Password is a required field" - next templated "error" - end - - if !challenge_response || !token - error_message = "CAPTCHA is a required field" - next templated "error" - end - - challenge_response = challenge_response.lstrip('0') - if OpenSSL::HMAC.digest(:sha256, HMAC_KEY, challenge_response) == Base64.decode(token) - else - error_message = "Invalid CAPTCHA response" - next templated "error" - end - - if action == "signin" - user = PG_DB.query_one?("SELECT * FROM users WHERE email = $1 AND password IS NOT NULL", email, as: User) - - if !user - error_message = "Invalid username or password" - next templated "error" - end - - if !user.password - error_message = "Please sign in using 'Sign in with Google'" - next templated "error" - end - - if Crypto::Bcrypt::Password.new(user.password.not_nil!) == password - sid = Base64.encode(Random::Secure.random_bytes(50)) - PG_DB.exec("UPDATE users SET id = $1 WHERE email = $2", sid, email) - - if Kemal.config.ssl || CONFIG.https_only - secure = true - else - secure = false - end - - env.response.cookies["SID"] = HTTP::Cookie.new(name: "SID", value: sid, expires: Time.now + 2.years, - secure: secure, http_only: true) - else - error_message = "Invalid username or password" - next templated "error" - end - elsif action == "register" - user = PG_DB.query_one?("SELECT * FROM users WHERE email = $1 AND password IS NOT NULL", email, as: User) - if user - error_message = "Please sign in" - next templated "error" - end - - sid = Base64.encode(Random::Secure.random_bytes(50)) - user = create_user(sid, email, password) - user_array = user.to_a - - user_array[5] = user_array[5].to_json - args = arg_array(user_array) - - PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array) - - if Kemal.config.ssl || CONFIG.https_only - secure = true - else - secure = false - end - - env.response.cookies["SID"] = HTTP::Cookie.new(name: "SID", value: sid, expires: Time.now + 2.years, - secure: secure, http_only: true) - end - - env.redirect referer - end -end - -get "/signout" do |env| - referer = env.request.headers["referer"]? - referer ||= "/" - - env.request.cookies.each do |cookie| - cookie.expires = Time.new(1990, 1, 1) - end - - env.request.cookies.add_response_headers(env.response.headers) - env.redirect referer -end - -get "/preferences" do |env| - user = env.get? "user" - - referer = env.request.headers["referer"]? - referer ||= "/preferences" - - if referer.size > 64 - referer = "/preferences" - end - - if user - user = user.as(User) - templated "preferences" - else - env.redirect referer - end -end - -post "/preferences" do |env| - user = env.get? "user" - - referer = env.params.query["referer"]? - referer ||= "/preferences" - - if user - user = user.as(User) - - video_loop = env.params.body["video_loop"]?.try &.as(String) - video_loop ||= "off" - video_loop = video_loop == "on" - - autoplay = env.params.body["autoplay"]?.try &.as(String) - autoplay ||= "off" - autoplay = autoplay == "on" - - speed = env.params.body["speed"]?.try &.as(String).to_f? - speed ||= 1.0 - - quality = env.params.body["quality"]?.try &.as(String) - quality ||= "hd720" - - volume = env.params.body["volume"]?.try &.as(String).to_i? - volume ||= 100 - - comments = env.params.body["comments"]? - comments ||= "youtube" - - redirect_feed = env.params.body["redirect_feed"]?.try &.as(String) - redirect_feed ||= "off" - redirect_feed = redirect_feed == "on" - - dark_mode = env.params.body["dark_mode"]?.try &.as(String) - dark_mode ||= "off" - dark_mode = dark_mode == "on" - - thin_mode = env.params.body["thin_mode"]?.try &.as(String) - thin_mode ||= "off" - thin_mode = thin_mode == "on" - - max_results = env.params.body["max_results"]?.try &.as(String).to_i? - max_results ||= 40 - - sort = env.params.body["sort"]?.try &.as(String) - sort ||= "published" - - latest_only = env.params.body["latest_only"]?.try &.as(String) - latest_only ||= "off" - latest_only = latest_only == "on" - - unseen_only = env.params.body["unseen_only"]?.try &.as(String) - unseen_only ||= "off" - unseen_only = unseen_only == "on" - - notifications_only = env.params.body["notifications_only"]?.try &.as(String) - notifications_only ||= "off" - notifications_only = notifications_only == "on" - - preferences = { - "video_loop" => video_loop, - "autoplay" => autoplay, - "speed" => speed, - "quality" => quality, - "volume" => volume, - "comments" => comments, - "redirect_feed" => redirect_feed, - "dark_mode" => dark_mode, - "thin_mode" => thin_mode, - "max_results" => max_results, - "sort" => sort, - "latest_only" => latest_only, - "unseen_only" => unseen_only, - "notifications_only" => notifications_only, - }.to_json - - PG_DB.exec("UPDATE users SET preferences = $1 WHERE email = $2", preferences, user.email) - end - - env.redirect referer -end - -# Get subscriptions for authorized user -get "/feed/subscriptions" do |env| - user = env.get? "user" - - if user - user = user.as(User) - preferences = user.preferences - - # Refresh account - headers = HTTP::Headers.new - headers["Cookie"] = env.request.headers["Cookie"] - - if !user.password - client = make_client(YT_URL) - user = get_user(user.id, client, headers, PG_DB) - end - - max_results = preferences.max_results - max_results ||= env.params.query["max_results"]?.try &.to_i? - max_results ||= 40 - - page = env.params.query["page"]?.try &.to_i? - page ||= 1 - - if max_results < 0 - limit = nil - offset = (page - 1) * 1 - else - limit = max_results - offset = (page - 1) * max_results - end - - notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, - as: Array(String)) - if preferences.notifications_only && !notifications.empty? - args = arg_array(notifications) - - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE id IN (#{args}) - ORDER BY published DESC", notifications, as: ChannelVideo) - notifications = [] of ChannelVideo - - videos.sort_by! { |video| video.published }.reverse! - - case preferences.sort - when "alphabetically" - videos.sort_by! { |video| video.title } - when "alphabetically - reverse" - videos.sort_by! { |video| video.title }.reverse! - when "channel name" - videos.sort_by! { |video| video.author } - when "channel name - reverse" - videos.sort_by! { |video| video.author }.reverse! - end - else - if preferences.latest_only - if preferences.unseen_only - ucids = arg_array(user.subscriptions) - if user.watched.empty? - watched = "'{}'" - else - watched = arg_array(user.watched, user.subscriptions.size + 1) - end - - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", - user.subscriptions + user.watched, as: ChannelVideo) - else - args = arg_array(user.subscriptions) - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) - end - - videos.sort_by! { |video| video.published }.reverse! - else - if preferences.unseen_only - ucids = arg_array(user.subscriptions, 3) - if user.watched.empty? - watched = "'{}'" - else - watched = arg_array(user.watched, user.subscriptions.size + 3) - end - - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{ucids}) \ - AND id NOT IN (#{watched}) ORDER BY published DESC LIMIT $1 OFFSET $2", - [limit, offset] + user.subscriptions + user.watched, as: ChannelVideo) - else - args = arg_array(user.subscriptions, 3) - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ - ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) - end - end - - case preferences.sort - when "alphabetically" - videos.sort_by! { |video| video.title } - when "alphabetically - reverse" - videos.sort_by! { |video| video.title }.reverse! - when "channel name" - videos.sort_by! { |video| video.author } - when "channel name - reverse" - videos.sort_by! { |video| video.author }.reverse! - end - - # TODO: Add option to disable picking out notifications from regular feed - notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, - as: Array(String)) - - notifications = videos.select { |v| notifications.includes? v.id } - videos = videos - notifications - end - - if !limit - videos = videos[0..max_results] - end - - PG_DB.exec("UPDATE users SET notifications = $1, updated = $2 WHERE id = $3", [] of String, Time.now, - user.id) - user.notifications = [] of String - env.set "user", user - - templated "subscriptions" - else - env.redirect "/" - end -end - -get "/feed/channel/:ucid" do |env| - ucid = env.params.url["ucid"] - - client = make_client(YT_URL) - if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) - rss = client.get("/feeds/videos.xml?user=#{ucid}").body - rss = XML.parse_html(rss) - - ucid = rss.xpath_node("//feed/channelid") - if ucid - ucid = ucid.content - else - env.response.content_type = "application/json" - next {"error" => "User does not exist"}.to_json - end - end - - url = produce_videos_url(ucid) - response = client.get(url) - - channel = get_channel(ucid, client, PG_DB, pull_all_videos: false) - - json = JSON.parse(response.body) - if !json["content_html"]? || json["content_html"].as_s.empty? - error_message = "This channel does not exist or has no videos." - next templated "error" - end - - content_html = json["content_html"].as_s - if content_html.empty? - halt env, status_code: 403 - end - document = XML.parse_html(content_html) - - if Kemal.config.ssl || CONFIG.https_only - scheme = "https://" - else - scheme = "http://" - end - host = env.request.headers["Host"] - path = env.request.path - - feed = XML.build(indent: " ", encoding: "UTF-8") do |xml| - xml.element("feed", "xmlns:yt": "http://www.youtube.com/xml/schemas/2015", - "xmlns:media": "http://search.yahoo.com/mrss/", xmlns: "http://www.w3.org/2005/Atom") do - xml.element("link", rel: "self", href: "#{scheme}#{host}#{path}") - xml.element("id") { xml.text "yt:channel:#{ucid}" } - xml.element("yt:channelId") { xml.text ucid } - xml.element("title") { xml.text channel.author } - xml.element("link", rel: "alternate", href: "#{scheme}#{host}/channel/#{ucid}") - - xml.element("author") do - xml.element("name") { xml.text channel.author } - xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{ucid}" } - end - - document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node| - anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)).not_nil! - title = anchor.content.strip - video_id = anchor["href"].lchop("/watch?v=") - - view_count = node.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[2])).not_nil! - view_count = view_count.content.rchop(" views") - if view_count = "No" - view_count = 0 - else - view_count = view_count.delete(",").to_i - end - - descriptionHtml = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])) - if !descriptionHtml - description = "" - descriptionHtml = "" - else - descriptionHtml = descriptionHtml.to_s - description = descriptionHtml.gsub("
", "\n") - description = description.gsub("
", "\n") - description = XML.parse_html(description).content.strip("\n ") - end - - published = node.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[1])) - if !published - next - end - published = published.content - published = decode_date(published) - - xml.element("entry") do - xml.element("id") { xml.text "yt:video:#{video_id}" } - xml.element("yt:videoId") { xml.text video_id } - xml.element("yt:channelId") { xml.text ucid } - xml.element("title") { xml.text title } - xml.element("link", rel: "alternate", href: "#{scheme}#{host}/watch?v=#{video_id}") - - xml.element("author") do - xml.element("name") { xml.text channel.author } - xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{ucid}" } - end - - xml.element("published") { xml.text published.to_s("%Y-%m-%dT%H:%M:%S%:z") } - - xml.element("media:group") do - xml.element("media:title") { xml.text title } - xml.element("media:thumbnail", url: "https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg", - width: "480", height: "360") - xml.element("media:description") { xml.text description } - end - - xml.element("media:community") do - xml.element("media:statistics", views: view_count) - end - end - end - end - end - - env.response.content_type = "text/xml" - feed -end - -get "/feed/private" do |env| - token = env.params.query["token"]? - - if !token - halt env, status_code: 403 - end - - user = PG_DB.query_one?("SELECT * FROM users WHERE token = $1", token.strip, as: User) - if !user - halt env, status_code: 403 - end - - max_results = env.params.query["max_results"]?.try &.to_i? - max_results ||= 40 - - page = env.params.query["page"]?.try &.to_i? - page ||= 1 - - if max_results < 0 - limit = nil - offset = (page - 1) * 1 - else - limit = max_results - offset = (page - 1) * max_results - end - - latest_only = env.params.query["latest_only"]?.try &.to_i? - latest_only ||= 0 - latest_only = latest_only == 1 - - if latest_only - args = arg_array(user.subscriptions) - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) - videos.sort_by! { |video| video.published }.reverse! - else - args = arg_array(user.subscriptions, 3) - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ - ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) - end - - sort = env.params.query["sort"]? - sort ||= "published" - - case sort - when "alphabetically" - videos.sort_by! { |video| video.title } - when "reverse_alphabetically" - videos.sort_by! { |video| video.title }.reverse! - when "channel_name" - videos.sort_by! { |video| video.author } - when "reverse_channel_name" - videos.sort_by! { |video| video.author }.reverse! - end - - if Kemal.config.ssl || CONFIG.https_only - scheme = "https://" - else - scheme = "http://" - end - - if !limit - videos = videos[0..max_results] - end - - host = env.request.headers["Host"] - path = env.request.path - query = env.request.query.not_nil! - - feed = XML.build(indent: " ", encoding: "UTF-8") do |xml| - xml.element("feed", xmlns: "http://www.w3.org/2005/Atom", "xmlns:media": "http://search.yahoo.com/mrss/", - "xml:lang": "en-US") do - xml.element("link", "type": "text/html", rel: "alternate", href: "#{scheme}#{host}/feed/subscriptions") - xml.element("link", "type": "application/atom+xml", rel: "self", href: "#{scheme}#{host}#{path}?#{query}") - xml.element("title") { xml.text "Invidious Private Feed for #{user.email}" } - - videos.each do |video| - xml.element("entry") do - xml.element("id") { xml.text "yt:video:#{video.id}" } - xml.element("yt:videoId") { xml.text video.id } - xml.element("yt:channelId") { xml.text video.ucid } - xml.element("title") { xml.text video.title } - xml.element("link", rel: "alternate", href: "#{scheme}#{host}/watch?v=#{video.id}") - - xml.element("author") do - xml.element("name") { xml.text video.author } - xml.element("uri") { xml.text "#{scheme}#{host}/channel/#{video.ucid}" } - end - - xml.element("published") { xml.text video.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } - xml.element("updated") { xml.text video.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") } - - xml.element("media:group") do - xml.element("media:title") { xml.text video.title } - xml.element("media:thumbnail", url: "https://i.ytimg.com/vi/#{video.id}/hqdefault.jpg", - width: "480", height: "360") - end - end - end - end - end - - env.response.content_type = "application/atom+xml" - feed -end - -# Function that is useful if you have multiple channels that don't have -# the bell dinged. Request parameters are fairly self-explanatory, -# receive_all_updates = true and receive_post_updates = true will ding all -# channels. Calling /modify_notifications without any arguments will -# request all notifications from all channels. -# /modify_notifications?receive_all_updates=false&receive_no_updates=false -# will "unding" all subscriptions. -get "/modify_notifications" do |env| - user = env.get? "user" - - referer = env.request.headers["referer"]? - referer ||= "/" - - if user - user = user.as(User) - - channel_req = {} of String => String - - channel_req["receive_all_updates"] = env.params.query["receive_all_updates"]? || "true" - channel_req["receive_no_updates"] = env.params.query["receive_no_updates"]? || "" - channel_req["receive_post_updates"] = env.params.query["receive_post_updates"]? || "true" - - channel_req.reject! { |k, v| v != "true" && v != "false" } - - headers = HTTP::Headers.new - headers["Cookie"] = env.request.headers["Cookie"] - - client = make_client(YT_URL) - subs = client.get("/subscription_manager?disable_polymer=1", headers) - headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] - match = subs.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) - if match - session_token = match["session_token"] - else - next env.redirect referer - end - - channel_req["session_token"] = session_token - - headers["content-type"] = "application/x-www-form-urlencoded" - subs = XML.parse_html(subs.body) - subs.xpath_nodes(%q(//a[@class="subscription-title yt-uix-sessionlink"]/@href)).each do |channel| - channel_id = channel.content.lstrip("/channel/").not_nil! - - channel_req["channel_id"] = channel_id - - client.post("/subscription_ajax?action_update_subscription_preferences=1", headers, - HTTP::Params.encode(channel_req)).body - end - end - - env.redirect referer -end - -get "/subscription_manager" do |env| - user = env.get? "user" - - if !user - next env.redirect "/" - end - - user = user.as(User) - - if !user.password - # Refresh account - headers = HTTP::Headers.new - headers["Cookie"] = env.request.headers["Cookie"] - - client = make_client(YT_URL) - user = get_user(user.id, client, headers, PG_DB) - end - - action_takeout = env.params.query["action_takeout"]?.try &.to_i? - action_takeout ||= 0 - action_takeout = action_takeout == 1 - - format = env.params.query["format"]? - format ||= "rss" - - client = make_client(YT_URL) - - subscriptions = [] of InvidiousChannel - user.subscriptions.each do |ucid| - begin - subscriptions << get_channel(ucid, client, PG_DB, false) - rescue ex - next - end - end - subscriptions.sort_by! { |channel| channel.author.downcase } - - if action_takeout - if Kemal.config.ssl || CONFIG.https_only - scheme = "https://" - else - scheme = "http://" - end - host = env.request.headers["Host"] - - url = "#{scheme}#{host}" - - if format == "json" - env.response.content_type = "application/json" - env.response.headers["content-disposition"] = "attachment" - next { - "subscriptions" => user.subscriptions, - "watch_history" => user.watched, - "preferences" => user.preferences, - }.to_json - else - env.response.content_type = "application/xml" - env.response.headers["content-disposition"] = "attachment" - export = XML.build do |xml| - xml.element("opml", version: "1.1") do - xml.element("body") do - if format == "newpipe" - title = "YouTube Subscriptions" - else - title = "Invidious Subscriptions" - end - - xml.element("outline", text: title, title: title) do - subscriptions.each do |channel| - if format == "newpipe" - xmlUrl = "https://www.youtube.com/feeds/videos.xml?channel_id=#{channel.id}" - else - xmlUrl = "#{url}/feed/channel/#{channel.id}" - end - - xml.element("outline", text: channel.author, title: channel.author, - "type": "rss", xmlUrl: xmlUrl) - end - end - end - end - end - - next export.gsub(%(\n), "") - end - end - - templated "subscription_manager" -end - -get "/data_control" do |env| - user = env.get? "user" - referer = env.request.headers["referer"]? - referer ||= "/" - - if user - user = user.as(User) - - templated "data_control" - else - env.redirect referer - end -end - -post "/data_control" do |env| - user = env.get? "user" - referer = env.request.headers["referer"]? - referer ||= "/" - - if user - user = user.as(User) - - HTTP::FormData.parse(env.request) do |part| - body = part.body.gets_to_end - if body.empty? - next - end - - case part.name - when "import_invidious" - body = JSON.parse(body) - body["subscriptions"].as_a.each do |ucid| - ucid = ucid.as_s - if !user.subscriptions.includes? ucid - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) - - begin - client = make_client(YT_URL) - get_channel(ucid, client, PG_DB, false, false) - rescue ex - next - end - end - end - - body["watch_history"].as_a.each do |id| - id = id.as_s - if !user.watched.includes? id - PG_DB.exec("UPDATE users SET watched = array_append(watched,$1) WHERE id = $2", id, user.id) - end - end - - PG_DB.exec("UPDATE users SET preferences = $1 WHERE id = $2", body["preferences"].to_json, user.id) - when "import_youtube" - subscriptions = XML.parse(body) - subscriptions.xpath_nodes(%q(//outline[@type="rss"])).each do |channel| - ucid = channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] - - if !user.subscriptions.includes? ucid - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) - - begin - client = make_client(YT_URL) - get_channel(ucid, client, PG_DB, false, false) - rescue ex - next - end - end - end - when "import_newpipe_subscriptions" - body = JSON.parse(body) - body["subscriptions"].as_a.each do |channel| - ucid = channel["url"].as_s.match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] - - if !user.subscriptions.includes? ucid - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) - - begin - client = make_client(YT_URL) - get_channel(ucid, client, PG_DB, false, false) - rescue ex - next - end - end - end - when "import_newpipe" - Zip::Reader.open(body) do |file| - file.each_entry do |entry| - if entry.filename == "newpipe.db" - # We do this because the SQLite driver cannot parse a database from an IO - # Currently: channel URLs can **only** be subscriptions, and - # video URLs can **only** be watch history, so this works okay for now. - - db = entry.io.gets_to_end - db.scan(/youtube\.com\/watch\?v\=(?[a-zA-Z0-9_-]{11})/) do |md| - if !user.watched.includes? md["id"] - PG_DB.exec("UPDATE users SET watched = array_append(watched,$1) WHERE id = $2", md["id"], user.id) - end - end - - db.scan(/youtube\.com\/channel\/(?[a-zA-Z0-9_-]{22})/) do |md| - ucid = md["ucid"] - if !user.subscriptions.includes? ucid - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", ucid, user.id) - - begin - client = make_client(YT_URL) - get_channel(ucid, client, PG_DB, false, false) - rescue ex - next - end - end - end - end - end - end - end - end - end - - env.redirect referer -end - -get "/subscription_ajax" do |env| - user = env.get? "user" - referer = env.request.headers["referer"]? - referer ||= "/" - - if user - user = user.as(User) - - if env.params.query["action_create_subscription_to_channel"]? - action = "action_create_subscription_to_channel" - elsif env.params.query["action_remove_subscriptions"]? - action = "action_remove_subscriptions" - else - next env.redirect referer - end - - channel_id = env.params.query["c"]? - channel_id ||= "" - - if !user.password - headers = HTTP::Headers.new - headers["Cookie"] = env.request.headers["Cookie"] - - client = make_client(YT_URL) - subs = client.get("/subscription_manager?disable_polymer=1", headers) - headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] - match = subs.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) - if match - session_token = match["session_token"] - else - next env.redirect "/" - end - - headers["content-type"] = "application/x-www-form-urlencoded" - - post_req = { - "session_token" => session_token, - } - post_req = HTTP::Params.encode(post_req) - post_url = "/subscription_ajax?#{action}=1&c=#{channel_id}" - - # Update user - if client.post(post_url, headers, post_req).status_code == 200 - sid = user.id - - case action - when .starts_with? "action_create" - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", channel_id, sid) - when .starts_with? "action_remove" - PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid) - end - end - else - sid = user.id - - case action - when .starts_with? "action_create" - if !user.subscriptions.includes? channel_id - PG_DB.exec("UPDATE users SET subscriptions = array_append(subscriptions,$1) WHERE id = $2", channel_id, sid) - - client = make_client(YT_URL) - get_channel(channel_id, client, PG_DB, false, false) - end - when .starts_with? "action_remove" - PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid) - end - end - end - - env.redirect referer -end - -get "/clear_watch_history" do |env| - user = env.get? "user" - referer = env.request.headers["referer"]? - referer ||= "/" - - if user - user = user.as(User) - - PG_DB.exec("UPDATE users SET watched = '{}' WHERE id = $1", user.id) - end - - env.redirect referer -end - -get "/user/:user" do |env| - user = env.params.url["user"] - env.redirect "/channel/#{user}" -end - -get "/channel/:ucid" do |env| - user = env.get? "user" - if user - user = user.as(User) - subscriptions = user.subscriptions - end - subscriptions ||= [] of String - - ucid = env.params.url["ucid"] - - page = env.params.query["page"]?.try &.to_i? - page ||= 1 - - client = make_client(YT_URL) - - if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) - rss = client.get("/feeds/videos.xml?user=#{ucid}").body - rss = XML.parse_html(rss) - - ucid = rss.xpath_node("//feed/channelid") - if ucid - ucid = ucid.content - else - error_message = "User does not exist" - next templated "error" - end - - env.redirect "/channel/#{ucid}" - end - - url = produce_playlist_url(ucid, (page - 1) * 100) - response = client.get(url) - - json = JSON.parse(response.body) - if !json["content_html"]? || json["content_html"].as_s.empty? - error_message = "This channel does not exist or has no videos." - next templated "error" - end - - if json["content_html"].as_s.strip(" \n").empty? - rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body - rss = XML.parse_html(rss) - author = rss.xpath_node("//feed/author/name").not_nil!.content - - videos = [] of ChannelVideo - - next templated "channel" - end - - document = XML.parse_html(json["content_html"].as_s) - author = document.xpath_node(%q(//div[@class="pl-video-owner"]/a)).not_nil!.content - - videos = [] of ChannelVideo - document.xpath_nodes(%q(//a[contains(@class,"pl-video-title-link")])).each do |node| - href = URI.parse(node["href"]) - id = HTTP::Params.parse(href.query.not_nil!)["v"] - title = node.content - - videos << ChannelVideo.new(id, title, Time.now, Time.now, ucid, author) - end - - templated "channel" -end - -get "/channel/:ucid/videos" do |env| - ucid = env.params.url["ucid"] - params = env.request.query - - if !params || params.empty? - params = "" - else - params = "?#{params}" - end - - env.redirect "/channel/#{ucid}#{params}" -end - get "/api/manifest/dash/id/:id" do |env| env.response.headers.add("Access-Control-Allow-Origin", "*") env.response.content_type = "application/dash+xml" @@ -2877,12 +2790,6 @@ get "/api/manifest/dash/id/:id" do |env| manifest end -options "/videoplayback*" do |env| - env.response.headers["Access-Control-Allow-Origin"] = "*" - env.response.headers["Access-Control-Allow-Methods"] = "GET" - env.response.headers["Access-Control-Allow-Headers"] = "Content-Type, range" -end - get "/api/manifest/hls_variant/*" do |env| client = make_client(YT_URL) manifest = client.get(env.request.path) @@ -2934,6 +2841,12 @@ get "/api/manifest/hls_playlist/*" do |env| manifest end +options "/videoplayback*" do |env| + env.response.headers["Access-Control-Allow-Origin"] = "*" + env.response.headers["Access-Control-Allow-Methods"] = "GET" + env.response.headers["Access-Control-Allow-Headers"] = "Content-Type, range" +end + get "/videoplayback*" do |env| path = env.request.path if path != "/videoplayback" @@ -3007,27 +2920,6 @@ get "/videoplayback*" do |env| end end -get "/:id" do |env| - id = env.params.url["id"] - - if md = id.match(/[a-zA-Z0-9_-]{11}/) - params = [] of String - env.params.query.each do |k, v| - params << "#{k}=#{v}" - end - params = params.join("&") - - url = "/watch?v=#{id}" - if !params.empty? - url += "&#{params}" - end - - env.redirect url - else - env.response.status_code = 404 - end -end - error 404 do |env| error_message = "404 Page not found" templated "error" diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr new file mode 100644 index 00000000..f6cdad76 --- /dev/null +++ b/src/invidious/channels.cr @@ -0,0 +1,132 @@ +class InvidiousChannel + add_mapping({ + id: String, + author: String, + updated: Time, + }) +end + +class ChannelVideo + add_mapping({ + id: String, + title: String, + published: Time, + updated: Time, + ucid: String, + author: String, + }) +end + +def get_channel(id, client, db, refresh = true, pull_all_videos = true) + if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool) + channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel) + + if refresh && Time.now - channel.updated > 10.minutes + channel = fetch_channel(id, client, db, pull_all_videos) + channel_array = channel.to_a + args = arg_array(channel_array) + + db.exec("INSERT INTO channels VALUES (#{args}) \ + ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array) + end + else + channel = fetch_channel(id, client, db, pull_all_videos) + args = arg_array(channel.to_a) + db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a) + end + + return channel +end + +def fetch_channel(ucid, client, db, pull_all_videos = true) + rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body + rss = XML.parse_html(rss) + + author = rss.xpath_node(%q(//feed/title)) + if !author + raise "Deleted or invalid channel" + end + author = author.content + + if !pull_all_videos + rss.xpath_nodes("//feed/entry").each do |entry| + video_id = entry.xpath_node("videoid").not_nil!.content + title = entry.xpath_node("title").not_nil!.content + published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z", Time::Location.local) + updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z", Time::Location.local) + author = entry.xpath_node("author/name").not_nil!.content + ucid = entry.xpath_node("channelid").not_nil!.content + + video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author) + + db.exec("UPDATE users SET notifications = notifications || $1 \ + WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid) + + video_array = video.to_a + args = arg_array(video_array) + db.exec("INSERT INTO channel_videos VALUES (#{args}) \ + ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \ + updated = $4, ucid = $5, author = $6", video_array) + end + else + videos = [] of ChannelVideo + page = 1 + + loop do + url = produce_videos_url(ucid, page) + response = client.get(url) + + json = JSON.parse(response.body) + content_html = json["content_html"].as_s + if content_html.empty? + # If we don't get anything, move on + break + end + document = XML.parse_html(content_html) + + document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |item| + anchor = item.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)) + if !anchor + raise "could not find anchor" + end + + title = anchor.content.strip + video_id = anchor["href"].lchop("/watch?v=") + + published = item.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[1])) + if !published + # This happens on Youtube red videos, here we just skip them + next + end + published = published.content + published = decode_date(published) + + videos << ChannelVideo.new(video_id, title, published, Time.now, ucid, author) + end + + if document.xpath_nodes(%q(//li[contains(@class, "channels-content-item")])).size < 30 + break + end + + page += 1 + end + + video_ids = [] of String + videos.each do |video| + db.exec("UPDATE users SET notifications = notifications || $1 \ + WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid) + video_ids << video.id + + video_array = video.to_a + args = arg_array(video_array) + db.exec("INSERT INTO channel_videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", video_array) + end + + # When a video is deleted from a channel, we find and remove it here + db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{video_ids.map { |a| %("#{a}") }.join(",")}}') AND ucid = $1", ucid) + end + + channel = InvidiousChannel.new(ucid, author, Time.now) + + return channel +end diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr new file mode 100644 index 00000000..d7531fbb --- /dev/null +++ b/src/invidious/comments.cr @@ -0,0 +1,247 @@ +class RedditThing + JSON.mapping({ + kind: String, + data: RedditComment | RedditLink | RedditMore | RedditListing, + }) +end + +class RedditComment + JSON.mapping({ + author: String, + body_html: String, + replies: RedditThing | String, + score: Int32, + depth: Int32, + }) +end + +class RedditLink + JSON.mapping({ + author: String, + score: Int32, + subreddit: String, + num_comments: Int32, + id: String, + permalink: String, + title: String, + }) +end + +class RedditMore + JSON.mapping({ + children: Array(String), + count: Int32, + depth: Int32, + }) +end + +class RedditListing + JSON.mapping({ + children: Array(RedditThing), + modhash: String, + }) +end + +def get_reddit_comments(id, client, headers) + query = "(url:3D#{id}%20OR%20url:#{id})%20(site:youtube.com%20OR%20site:youtu.be)" + search_results = client.get("/search.json?q=#{query}", headers) + + if search_results.status_code == 200 + search_results = RedditThing.from_json(search_results.body) + + thread = search_results.data.as(RedditListing).children.sort_by { |child| child.data.as(RedditLink).score }[-1] + thread = thread.data.as(RedditLink) + + result = client.get("/r/#{thread.subreddit}/comments/#{thread.id}.json?limit=100&sort=top", headers).body + result = Array(RedditThing).from_json(result) + elsif search_results.status_code == 302 + result = client.get(search_results.headers["Location"], headers).body + result = Array(RedditThing).from_json(result) + + thread = result[0].data.as(RedditListing).children[0].data.as(RedditLink) + else + raise "Got error code #{search_results.status_code}" + end + + comments = result[1].data.as(RedditListing).children + return comments, thread +end + +def template_youtube_comments(comments) + html = "" + + root = comments["comments"].as_a + root.each do |child| + if child["replies"]? + replies_html = <<-END_HTML + + END_HTML + end + + html += <<-END_HTML +
+
+

+ [ - ] + #{child["likeCount"]} + #{child["author"]} +

+
+ #{child["content"]} + #{replies_html} +
+
+
+ END_HTML + end + + if comments["continuation"]? + html += <<-END_HTML +
+
+

+ Load more +

+
+
+ END_HTML + end + + return html +end + +def template_reddit_comments(root) + html = "" + root.each do |child| + if child.data.is_a?(RedditComment) + child = child.data.as(RedditComment) + author = child.author + score = child.score + body_html = HTML.unescape(child.body_html) + + replies_html = "" + if child.replies.is_a?(RedditThing) + replies = child.replies.as(RedditThing) + replies_html = template_reddit_comments(replies.data.as(RedditListing).children) + end + + content = <<-END_HTML +

+ [ - ] + #{score} + #{author} +

+
+ #{body_html} + #{replies_html} +
+ END_HTML + + if child.depth > 0 + html += <<-END_HTML +
+
+
+
+ #{content} +
+
+ END_HTML + else + html += <<-END_HTML +
+
+ #{content} +
+
+ END_HTML + end + end + end + + return html +end + +def add_alt_links(html) + alt_links = [] of {String, String} + + # This is painful but likely the only way to accomplish this in Crystal, + # as Crystigiri and others are not able to insert XML Nodes into a document. + # The goal here is to use as little regex as possible + html.scan(/]*>([^<]+)<\/a>/) do |match| + anchor = XML.parse_html(match[0]) + anchor = anchor.xpath_node("//a").not_nil! + url = URI.parse(anchor["href"]) + + if ["www.youtube.com", "m.youtube.com"].includes?(url.host) + if url.path == "/redirect" + params = HTTP::Params.parse(url.query.not_nil!) + alt_url = params["q"]? + alt_url ||= "/" + else + alt_url = url.full_path + end + + alt_link = <<-END_HTML + + + + END_HTML + elsif url.host == "youtu.be" + alt_link = <<-END_HTML + + + + END_HTML + elsif url.to_s == "#" + length_seconds = decode_length_seconds(anchor.content) + alt_anchor = <<-END_HTML + #{anchor.content} + END_HTML + + html = html.sub(anchor.to_s, alt_anchor) + next + else + alt_link = "" + end + + alt_links << {anchor.to_s, alt_link} + end + + alt_links.each do |original, alternate| + html = html.sub(original, original + alternate) + end + + return html +end + +def fill_links(html, scheme, host) + html = XML.parse_html(html) + + html.xpath_nodes("//a").each do |match| + url = URI.parse(match["href"]) + # Reddit links don't have host + if !url.host && !match["href"].starts_with?("javascript") && !url.to_s.ends_with? "#" + url.scheme = scheme + url.host = host + match["href"] = url + end + end + + if host == "www.youtube.com" + html = html.xpath_node(%q(//p[@id="eow-description"])).not_nil!.to_xml + else + html = html.to_xml(options: XML::SaveOptions::NO_DECL) + end + + html +end diff --git a/src/invidious/helpers.cr b/src/invidious/helpers.cr deleted file mode 100644 index 13a27c78..00000000 --- a/src/invidious/helpers.cr +++ /dev/null @@ -1,1272 +0,0 @@ -macro add_mapping(mapping) - def initialize({{*mapping.keys.map { |id| "@#{id}".id }}}) - end - - def to_a - return [{{*mapping.keys.map { |id| "@#{id}".id }}}] - end - - DB.mapping({{mapping}}) -end - -macro templated(filename) - render "src/invidious/views/#{{{filename}}}.ecr", "src/invidious/views/layout.ecr" -end - -macro rendered(filename) - render "src/invidious/views/#{{{filename}}}.ecr" -end - -DEFAULT_USER_PREFERENCES = Preferences.from_json({ - "video_loop" => false, - "autoplay" => false, - "speed" => 1.0, - "quality" => "hd720", - "volume" => 100, - "comments" => "youtube", - "dark_mode" => false, - "thin_mode " => false, - "max_results" => 40, - "sort" => "published", - "latest_only" => false, - "unseen_only" => false, -}.to_json) - -class Config - YAML.mapping({ - crawl_threads: Int32, - channel_threads: Int32, - video_threads: Int32, - db: NamedTuple( - user: String, - password: String, - host: String, - port: Int32, - dbname: String, - ), - dl_api_key: String?, - https_only: Bool?, - hmac_key: String?, - }) -end - -class FilteredCompressHandler < Kemal::Handler - exclude ["/videoplayback", "/api/*"] - - def call(env) - return call_next env if exclude_match? env - - {% if flag?(:without_zlib) %} - call_next env - {% else %} - request_headers = env.request.headers - - if request_headers.includes_word?("Accept-Encoding", "gzip") - env.response.headers["Content-Encoding"] = "gzip" - env.response.output = Gzip::Writer.new(env.response.output, sync_close: true) - elsif request_headers.includes_word?("Accept-Encoding", "deflate") - env.response.headers["Content-Encoding"] = "deflate" - env.response.output = Flate::Writer.new(env.response.output, sync_close: true) - end - - call_next env - {% end %} - end -end - -class Video - module HTTPParamConverter - def self.from_rs(rs) - HTTP::Params.parse(rs.read(String)) - end - end - - add_mapping({ - id: String, - info: { - type: HTTP::Params, - default: HTTP::Params.parse(""), - converter: Video::HTTPParamConverter, - }, - updated: Time, - title: String, - views: Int64, - likes: Int32, - dislikes: Int32, - wilson_score: Float64, - published: Time, - description: String, - language: String?, - author: String, - ucid: String, - allowed_regions: Array(String), - is_family_friendly: Bool, - genre: String, - }) -end - -class InvidiousChannel - add_mapping({ - id: String, - author: String, - updated: Time, - }) -end - -class ChannelVideo - add_mapping({ - id: String, - title: String, - published: Time, - updated: Time, - ucid: String, - author: String, - }) -end - -class User - module PreferencesConverter - def self.from_rs(rs) - begin - Preferences.from_json(rs.read(String)) - rescue ex - DEFAULT_USER_PREFERENCES - end - end - end - - add_mapping({ - id: String, - updated: Time, - notifications: Array(String), - subscriptions: Array(String), - email: String, - preferences: { - type: Preferences, - default: DEFAULT_USER_PREFERENCES, - converter: PreferencesConverter, - }, - password: String?, - token: String, - watched: Array(String), - }) -end - -# TODO: Migrate preferences so this will not be nilable -class Preferences - JSON.mapping({ - video_loop: Bool, - autoplay: Bool, - speed: Float32, - quality: String, - volume: Int32, - comments: { - type: String, - nilable: true, - default: "youtube", - }, - redirect_feed: { - type: Bool, - nilable: true, - default: false, - }, - dark_mode: Bool, - thin_mode: { - type: Bool, - nilable: true, - default: false, - }, - max_results: Int32, - sort: String, - latest_only: Bool, - unseen_only: Bool, - notifications_only: { - type: Bool, - nilable: true, - default: false, - }, - }) -end - -class RedditThing - JSON.mapping({ - kind: String, - data: RedditComment | RedditLink | RedditMore | RedditListing, - }) -end - -class RedditComment - JSON.mapping({ - author: String, - body_html: String, - replies: RedditThing | String, - score: Int32, - depth: Int32, - }) -end - -class RedditLink - JSON.mapping({ - author: String, - score: Int32, - subreddit: String, - num_comments: Int32, - id: String, - permalink: String, - title: String, - }) -end - -class RedditMore - JSON.mapping({ - children: Array(String), - count: Int32, - depth: Int32, - }) -end - -class RedditListing - JSON.mapping({ - children: Array(RedditThing), - modhash: String, - }) -end - -# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html -def ci_lower_bound(pos, n) - if n == 0 - return 0.0 - end - - # z value here represents a confidence level of 0.95 - z = 1.96 - phat = 1.0*pos/n - - return (phat + z*z/(2*n) - z * Math.sqrt((phat*(1 - phat) + z*z/(4*n))/n))/(1 + z*z/n) -end - -def elapsed_text(elapsed) - millis = elapsed.total_milliseconds - return "#{millis.round(2)}ms" if millis >= 1 - - "#{(millis * 1000).round(2)}µs" -end - -def fetch_video(id) - html_channel = Channel(XML::Node).new - info_channel = Channel(HTTP::Params).new - - spawn do - client = make_client(YT_URL) - html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&disable_polymer=1") - html = XML.parse_html(html.body) - - html_channel.send(html) - end - - spawn do - client = make_client(YT_URL) - info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") - info = HTTP::Params.parse(info.body) - - if info["reason"]? - info = client.get("/get_video_info?video_id=#{id}&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") - info = HTTP::Params.parse(info.body) - end - - info_channel.send(info) - end - - html = html_channel.receive - info = info_channel.receive - - if info["reason"]? - raise info["reason"] - end - - title = info["title"] - views = info["view_count"].to_i64 - author = info["author"] - ucid = info["ucid"] - - likes = html.xpath_node(%q(//button[@title="I like this"]/span)) - likes = likes.try &.content.delete(",").try &.to_i - likes ||= 0 - - dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) - dislikes = dislikes.try &.content.delete(",").try &.to_i - dislikes ||= 0 - - description = html.xpath_node(%q(//p[@id="eow-description"])) - description = description ? description.to_xml : "" - - wilson_score = ci_lower_bound(likes, likes + dislikes) - - published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).not_nil!["content"] - published = Time.parse(published, "%Y-%m-%d", Time::Location.local) - - allowed_regions = html.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",") - is_family_friendly = html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True" - genre = html.xpath_node(%q(//meta[@itemprop="genre"])).not_nil!["content"] - - video = Video.new(id, info, Time.now, title, views, likes, dislikes, wilson_score, published, description, - nil, author, ucid, allowed_regions, is_family_friendly, genre) - - return video -end - -def get_video(id, db, refresh = true) - if db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", id, as: Bool) - video = db.query_one("SELECT * FROM videos WHERE id = $1", id, as: Video) - - # If record was last updated over an hour ago, refresh (expire param in response lasts for 6 hours) - if refresh && Time.now - video.updated > 1.hour - begin - video = fetch_video(id) - video_array = video.to_a - args = arg_array(video_array[1..-1], 2) - - db.exec("UPDATE videos SET (info,updated,title,views,likes,dislikes,wilson_score,\ - published,description,language,author,ucid, allowed_regions, is_family_friendly, genre)\ - = (#{args}) WHERE id = $1", video_array) - rescue ex - db.exec("DELETE FROM videos * WHERE id = $1", id) - raise ex - end - end - else - video = fetch_video(id) - video_array = video.to_a - args = arg_array(video_array) - - db.exec("INSERT INTO videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", video_array) - end - - return video -end - -def search(query, page = 1) - client = make_client(YT_URL) - html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body - html = XML.parse_html(html) - - videos = [] of ChannelVideo - - html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item| - root = item.xpath_node(%q(div[contains(@class,"yt-lockup-video")]/div)) - if !root - next - end - - id = root.xpath_node(%q(.//div[contains(@class,"yt-lockup-thumbnail")]/a/@href)).not_nil!.content.lchop("/watch?v=") - - title = root.xpath_node(%q(.//div[@class="yt-lockup-content"]/h3/a)).not_nil!.content - - author = root.xpath_node(%q(.//div[@class="yt-lockup-content"]/div/a)).not_nil! - ucid = author["href"].rpartition("/")[-1] - author = author.content - - published = root.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li[1])).not_nil!.content - published = decode_date(published) - - video = ChannelVideo.new(id, title, published, Time.now, ucid, author) - videos << video - end - - return videos -end - -def splice(a, b) - c = a[0] - a[0] = a[b % a.size] - a[b % a.size] = c - return a -end - -def decrypt_signature(a, code) - a = a.split("") - - code.each do |item| - case item[:name] - when "a" - a.reverse! - when "b" - a.delete_at(0..(item[:value] - 1)) - when "c" - a = splice(a, item[:value]) - end - end - - return a.join("") -end - -def update_decrypt_function(client) - # Video with signature - document = client.get("/watch?v=CvFH_6DNRCY").body - url = document.match(/src="(?\/yts\/jsbin\/player-.{9}\/en_US\/base.js)"/).not_nil!["url"] - player = client.get(url).body - - function_name = player.match(/\(b\|\|\(b="signature"\),d.set\(b,(?[a-zA-Z0-9]{2})\(c\)\)\)/).not_nil!["name"] - function_body = player.match(/#{function_name}=function\(a\){(?[^}]+)}/).not_nil!["body"] - function_body = function_body.split(";")[1..-2] - - var_name = function_body[0][0, 2] - - operations = {} of String => String - matches = player.delete("\n").match(/var #{var_name}={(?[a-zA-Z0-9]{2}:[^}]+}),(?[a-zA-Z0-9]{2}:[^}]+}),(?[a-zA-Z0-9]{2}:[^}]+})};/).not_nil! - 3.times do |i| - operation = matches["op#{i + 1}"] - op_name = operation[0, 2] - - op_body = operation.match(/\{[^}]+\}/).not_nil![0] - case op_body - when "{a.reverse()}" - operations[op_name] = "a" - when "{a.splice(0,b)}" - operations[op_name] = "b" - else - operations[op_name] = "c" - end - end - - decrypt_function = [] of {name: String, value: Int32} - function_body.each do |function| - function = function.lchop(var_name + ".") - op_name = function[0, 2] - - function = function.lchop(op_name + "(a,") - value = function.rchop(")").to_i - - decrypt_function << {name: operations[op_name], value: value} - end - - return decrypt_function -end - -def rank_videos(db, n, filter, url) - top = [] of {Float64, String} - - db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs| - rs.each do - id = rs.read(String) - wilson_score = rs.read(Float64) - published = rs.read(Time) - - # Exponential decay, older videos tend to rank lower - temperature = wilson_score * Math.exp(-0.000005*((Time.now - published).total_minutes)) - top << {temperature, id} - end - end - - top.sort! - - # Make hottest come first - top.reverse! - top = top.map { |a, b| b } - - if filter - language_list = [] of String - top.each do |id| - if language_list.size == n - break - else - client = make_client(url) - begin - video = get_video(id, db) - rescue ex - next - end - - if video.language - language = video.language - else - description = XML.parse(video.description) - content = [video.title, description.content].join(" ") - content = content[0, 10000] - - results = DetectLanguage.detect(content) - language = results[0].language - - db.exec("UPDATE videos SET language = $1 WHERE id = $2", language, id) - end - - if language == "en" - language_list << id - end - end - end - return language_list - else - return top[0..n - 1] - end -end - -def make_client(url) - context = OpenSSL::SSL::Context::Client.new - context.add_options( - OpenSSL::SSL::Options::ALL | - OpenSSL::SSL::Options::NO_SSL_V2 | - OpenSSL::SSL::Options::NO_SSL_V3 - ) - client = HTTP::Client.new(url, context) - client.read_timeout = 10.seconds - client.connect_timeout = 10.seconds - return client -end - -def get_reddit_comments(id, client, headers) - query = "(url:3D#{id}%20OR%20url:#{id})%20(site:youtube.com%20OR%20site:youtu.be)" - search_results = client.get("/search.json?q=#{query}", headers) - - if search_results.status_code == 200 - search_results = RedditThing.from_json(search_results.body) - - thread = search_results.data.as(RedditListing).children.sort_by { |child| child.data.as(RedditLink).score }[-1] - thread = thread.data.as(RedditLink) - - result = client.get("/r/#{thread.subreddit}/comments/#{thread.id}.json?limit=100&sort=top", headers).body - result = Array(RedditThing).from_json(result) - elsif search_results.status_code == 302 - result = client.get(search_results.headers["Location"], headers).body - result = Array(RedditThing).from_json(result) - - thread = result[0].data.as(RedditListing).children[0].data.as(RedditLink) - else - raise "Got error code #{search_results.status_code}" - end - - comments = result[1].data.as(RedditListing).children - return comments, thread -end - -def template_youtube_comments(comments) - html = "" - - root = comments["comments"].as_a - root.each do |child| - if child["replies"]? - replies_html = <<-END_HTML - - END_HTML - end - - html += <<-END_HTML -
-
-

- [ - ] - #{child["likeCount"]} - #{child["author"]} -

-
- #{child["content"]} - #{replies_html} -
-
-
- END_HTML - end - - if comments["continuation"]? - html += <<-END_HTML -
-
-

- Load more -

-
-
- END_HTML - end - - return html -end - -def template_reddit_comments(root) - html = "" - root.each do |child| - if child.data.is_a?(RedditComment) - child = child.data.as(RedditComment) - author = child.author - score = child.score - body_html = HTML.unescape(child.body_html) - - replies_html = "" - if child.replies.is_a?(RedditThing) - replies = child.replies.as(RedditThing) - replies_html = template_reddit_comments(replies.data.as(RedditListing).children) - end - - content = <<-END_HTML -

- [ - ] - #{score} - #{author} -

-
- #{body_html} - #{replies_html} -
- END_HTML - - if child.depth > 0 - html += <<-END_HTML -
-
-
-
- #{content} -
-
- END_HTML - else - html += <<-END_HTML -
-
- #{content} -
-
- END_HTML - end - end - end - - return html -end - -def number_with_separator(number) - number.to_s.reverse.gsub(/(\d{3})(?=\d)/, "\\1,").reverse -end - -def arg_array(array, start = 1) - if array.size == 0 - args = "NULL" - else - args = [] of String - (start..array.size + start - 1).each { |i| args << "($#{i})" } - args = args.join(",") - end - - return args -end - -def add_alt_links(html) - alt_links = [] of {String, String} - - # This is painful but likely the only way to accomplish this in Crystal, - # as Crystigiri and others are not able to insert XML Nodes into a document. - # The goal here is to use as little regex as possible - html.scan(/]*>([^<]+)<\/a>/) do |match| - anchor = XML.parse_html(match[0]) - anchor = anchor.xpath_node("//a").not_nil! - url = URI.parse(anchor["href"]) - - if ["www.youtube.com", "m.youtube.com"].includes?(url.host) - if url.path == "/redirect" - params = HTTP::Params.parse(url.query.not_nil!) - alt_url = params["q"]? - alt_url ||= "/" - else - alt_url = url.full_path - end - - alt_link = <<-END_HTML - - - - END_HTML - elsif url.host == "youtu.be" - alt_link = <<-END_HTML - - - - END_HTML - elsif url.to_s == "#" - length_seconds = decode_length_seconds(anchor.content) - alt_anchor = <<-END_HTML - #{anchor.content} - END_HTML - - html = html.sub(anchor.to_s, alt_anchor) - next - else - alt_link = "" - end - - alt_links << {anchor.to_s, alt_link} - end - - alt_links.each do |original, alternate| - html = html.sub(original, original + alternate) - end - - return html -end - -def fill_links(html, scheme, host) - html = XML.parse_html(html) - - html.xpath_nodes("//a").each do |match| - url = URI.parse(match["href"]) - # Reddit links don't have host - if !url.host && !match["href"].starts_with?("javascript") && !url.to_s.ends_with? "#" - url.scheme = scheme - url.host = host - match["href"] = url - end - end - - if host == "www.youtube.com" - html = html.xpath_node(%q(//p[@id="eow-description"])).not_nil!.to_xml - else - html = html.to_xml(options: XML::SaveOptions::NO_DECL) - end - - html -end - -def login_req(login_form, f_req) - data = { - "pstMsg" => "1", - "checkConnection" => "youtube", - "checkedDomains" => "youtube", - "hl" => "en", - "deviceinfo" => %q([null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]), - "f.req" => f_req, - "flowName" => "GlifWebSignIn", - "flowEntry" => "ServiceLogin", - } - - data = login_form.merge(data) - - return HTTP::Params.encode(data) -end - -def get_channel(id, client, db, refresh = true, pull_all_videos = true) - if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool) - channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel) - - if refresh && Time.now - channel.updated > 10.minutes - channel = fetch_channel(id, client, db, pull_all_videos) - channel_array = channel.to_a - args = arg_array(channel_array) - - db.exec("INSERT INTO channels VALUES (#{args}) \ - ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array) - end - else - channel = fetch_channel(id, client, db, pull_all_videos) - args = arg_array(channel.to_a) - db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a) - end - - return channel -end - -def fetch_channel(ucid, client, db, pull_all_videos = true) - rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body - rss = XML.parse_html(rss) - - author = rss.xpath_node(%q(//feed/title)) - if !author - raise "Deleted or invalid channel" - end - author = author.content - - if !pull_all_videos - rss.xpath_nodes("//feed/entry").each do |entry| - video_id = entry.xpath_node("videoid").not_nil!.content - title = entry.xpath_node("title").not_nil!.content - published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z", Time::Location.local) - updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z", Time::Location.local) - author = entry.xpath_node("author/name").not_nil!.content - ucid = entry.xpath_node("channelid").not_nil!.content - - video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author) - - db.exec("UPDATE users SET notifications = notifications || $1 \ - WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid) - - video_array = video.to_a - args = arg_array(video_array) - db.exec("INSERT INTO channel_videos VALUES (#{args}) \ - ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \ - updated = $4, ucid = $5, author = $6", video_array) - end - else - videos = [] of ChannelVideo - page = 1 - - loop do - url = produce_videos_url(ucid, page) - response = client.get(url) - - json = JSON.parse(response.body) - content_html = json["content_html"].as_s - if content_html.empty? - # If we don't get anything, move on - break - end - document = XML.parse_html(content_html) - - document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |item| - anchor = item.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)) - if !anchor - raise "could not find anchor" - end - - title = anchor.content.strip - video_id = anchor["href"].lchop("/watch?v=") - - published = item.xpath_node(%q(.//div[@class="yt-lockup-meta"]/ul/li[1])) - if !published - # This happens on Youtube red videos, here we just skip them - next - end - published = published.content - published = decode_date(published) - - videos << ChannelVideo.new(video_id, title, published, Time.now, ucid, author) - end - - if document.xpath_nodes(%q(//li[contains(@class, "channels-content-item")])).size < 30 - break - end - - page += 1 - end - - video_ids = [] of String - videos.each do |video| - db.exec("UPDATE users SET notifications = notifications || $1 \ - WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid) - video_ids << video.id - - video_array = video.to_a - args = arg_array(video_array) - db.exec("INSERT INTO channel_videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", video_array) - end - - # When a video is deleted from a channel, we find and remove it here - db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{video_ids.map { |a| %("#{a}") }.join(",")}}') AND ucid = $1", ucid) - end - - channel = InvidiousChannel.new(ucid, author, Time.now) - - return channel -end - -def get_user(sid, client, headers, db, refresh = true) - if db.query_one?("SELECT EXISTS (SELECT true FROM users WHERE id = $1)", sid, as: Bool) - user = db.query_one("SELECT * FROM users WHERE id = $1", sid, as: User) - - if refresh && Time.now - user.updated > 1.minute - user = fetch_user(sid, client, headers, db) - user_array = user.to_a - - user_array[5] = user_array[5].to_json - args = arg_array(user_array) - - db.exec("INSERT INTO users VALUES (#{args}) \ - ON CONFLICT (email) DO UPDATE SET id = $1, updated = $2, subscriptions = $4", user_array) - end - else - user = fetch_user(sid, client, headers, db) - user_array = user.to_a - - user_array[5] = user_array[5].to_json - args = arg_array(user.to_a) - - db.exec("INSERT INTO users VALUES (#{args}) \ - ON CONFLICT (email) DO UPDATE SET id = $1, updated = $2, subscriptions = $4", user_array) - end - - return user -end - -def fetch_user(sid, client, headers, db) - feed = client.get("/subscription_manager?disable_polymer=1", headers) - feed = XML.parse_html(feed.body) - - channels = [] of String - feed.xpath_nodes(%q(//ul[@id="guide-channels"]/li/a)).each do |channel| - if !["Popular on YouTube", "Music", "Sports", "Gaming"].includes? channel["title"] - channel_id = channel["href"].lstrip("/channel/") - - begin - channel = get_channel(channel_id, client, db, false, false) - channels << channel.id - rescue ex - next - end - end - end - - email = feed.xpath_node(%q(//a[@class="yt-masthead-picker-header yt-masthead-picker-active-account"])) - if email - email = email.content.strip - else - email = "" - end - - token = Base64.urlsafe_encode(Random::Secure.random_bytes(32)) - - user = User.new(sid, Time.now, [] of String, channels, email, DEFAULT_USER_PREFERENCES, nil, token, [] of String) - return user -end - -def create_user(sid, email, password) - password = Crypto::Bcrypt::Password.create(password, cost: 10) - token = Base64.urlsafe_encode(Random::Secure.random_bytes(32)) - - user = User.new(sid, Time.now, [] of String, [] of String, email, DEFAULT_USER_PREFERENCES, password.to_s, token, [] of String) - - return user -end - -def decode_length_seconds(string) - length_seconds = string.split(":").map { |a| a.to_i } - length_seconds = [0] * (3 - length_seconds.size) + length_seconds - length_seconds = Time::Span.new(length_seconds[0], length_seconds[1], length_seconds[2]) - length_seconds = length_seconds.total_seconds.to_i - - return length_seconds -end - -def decode_time(string) - time = string.try &.to_f? - - if !time - hours = /(?\d+)h/.match(string).try &.["hours"].try &.to_f - hours ||= 0 - - minutes = /(?\d+)m(?!s)/.match(string).try &.["minutes"].try &.to_f - minutes ||= 0 - - seconds = /(?\d+)s/.match(string).try &.["seconds"].try &.to_f - seconds ||= 0 - - millis = /(?\d+)ms/.match(string).try &.["millis"].try &.to_f - millis ||= 0 - - time = hours * 3600 + minutes * 60 + seconds + millis / 1000 - end - - return time -end - -def decode_date(string : String) - # Time matches format "20 hours ago", "40 minutes ago"... - date = string.split(" ")[-3, 3] - delta = date[0].to_i - - case date[1] - when .includes? "minute" - delta = delta.minutes - when .includes? "hour" - delta = delta.hours - when .includes? "day" - delta = delta.days - when .includes? "week" - delta = delta.weeks - when .includes? "month" - delta = delta.months - when .includes? "year" - delta = delta.years - else - raise "Could not parse #{string}" - end - - return Time.now - delta -end - -def recode_date(time : Time) - span = Time.now - time - - if span.total_days > 365.0 - span = {span.total_days / 365, "year"} - elsif span.total_days > 30.0 - span = {span.total_days / 30, "month"} - elsif span.total_days > 7.0 - span = {span.total_days / 7, "week"} - elsif span.total_hours > 24.0 - span = {span.total_days, "day"} - elsif span.total_minutes > 60.0 - span = {span.total_hours, "hour"} - else - span = {0, "units"} - end - - span = {span[0].to_i, span[1]} - if span[0] > 1 - span = {span[0], span[1] + "s"} - end - - return span.join(" ") -end - -def produce_playlist_url(ucid, index) - ucid = ucid.lchop("UC") - ucid = "VLUU" + ucid - - continuation = write_var_int(index) - continuation.unshift(0x08_u8) - slice = continuation.to_unsafe.to_slice(continuation.size) - - continuation = Base64.urlsafe_encode(slice, false) - continuation = "PT:" + continuation - continuation = continuation.bytes - continuation.unshift(0x7a_u8, continuation.size.to_u8) - - slice = continuation.to_unsafe.to_slice(continuation.size) - continuation = Base64.urlsafe_encode(slice) - continuation = URI.escape(continuation) - continuation = continuation.bytes - continuation.unshift(continuation.size.to_u8) - - continuation.unshift(ucid.size.to_u8) - continuation = ucid.bytes + continuation - continuation.unshift(0x12.to_u8, ucid.size.to_u8) - continuation.unshift(0xe2_u8, 0xa9_u8, 0x85_u8, 0xb2_u8, 2_u8, continuation.size.to_u8) - - slice = continuation.to_unsafe.to_slice(continuation.size) - continuation = Base64.urlsafe_encode(slice) - continuation = URI.escape(continuation) - - url = "/browse_ajax?action_continuation=1&continuation=#{continuation}" - - return url -end - -def produce_videos_url(ucid, page = 1) - page = "#{page}" - - meta = "\x12\x06videos \x00\x30\x02\x38\x01\x60\x01\x6a\x00\x7a" - meta += page.size.to_u8.unsafe_chr - meta += page - meta += "\xb8\x01\x00" - - meta = Base64.urlsafe_encode(meta) - meta = URI.escape(meta) - - continuation = "\x12" - continuation += ucid.size.to_u8.unsafe_chr - continuation += ucid - continuation += "\x1a" - continuation += meta.size.to_u8.unsafe_chr - continuation += meta - - continuation = continuation.size.to_u8.unsafe_chr + continuation - continuation = "\xe2\xa9\x85\xb2\x02" + continuation - - continuation = Base64.urlsafe_encode(continuation) - continuation = URI.escape(continuation) - - url = "/browse_ajax?continuation=#{continuation}" - - return url -end - -def read_var_int(bytes) - numRead = 0 - result = 0 - - read = bytes[numRead] - - if bytes.size == 1 - result = bytes[0].to_i32 - else - while ((read & 0b10000000) != 0) - read = bytes[numRead].to_u64 - value = (read & 0b01111111) - result |= (value << (7 * numRead)) - - numRead += 1 - if numRead > 5 - raise "VarInt is too big" - end - end - end - - return result -end - -def write_var_int(value : Int) - bytes = [] of UInt8 - value = value.to_u32 - - if value == 0 - bytes = [0_u8] - else - while value != 0 - temp = (value & 0b01111111).to_u8 - value = value >> 7 - - if value != 0 - temp |= 0b10000000 - end - - bytes << temp - end - end - - return bytes -end - -def generate_captcha(key) - minute = Random::Secure.rand(12) - minute_angle = minute * 30 - minute = minute * 5 - - hour = Random::Secure.rand(12) - hour_angle = hour * 30 + minute_angle.to_f / 12 - if hour == 0 - hour = 12 - end - - clock_svg = <<-END_SVG - - - - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - - - - - - END_SVG - - challenge = "" - convert = Process.run(%(convert -density 1200 -resize 400x400 -background none svg:- png:-), shell: true, - input: IO::Memory.new(clock_svg), output: Process::Redirect::Pipe) do |proc| - challenge = proc.output.gets_to_end - challenge = Base64.strict_encode(challenge) - challenge = "data:image/png;base64,#{challenge}" - end - - answer = "#{hour}:#{minute.to_s.rjust(2, '0')}" - token = OpenSSL::HMAC.digest(:sha256, key, answer) - token = Base64.encode(token) - - return {challenge: challenge, token: token} -end - -def itag_to_metadata(itag : String) - # See https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L380-#L476 - formats = {"5" => {"ext" => "flv", "width" => 400, "height" => 240, "acodec" => "mp3", "abr" => 64, "vcodec" => "h263"}, - "6" => {"ext" => "flv", "width" => 450, "height" => 270, "acodec" => "mp3", "abr" => 64, "vcodec" => "h263"}, - "13" => {"ext" => "3gp", "acodec" => "aac", "vcodec" => "mp4v"}, - "17" => {"ext" => "3gp", "width" => 176, "height" => 144, "acodec" => "aac", "abr" => 24, "vcodec" => "mp4v"}, - "18" => {"ext" => "mp4", "width" => 640, "height" => 360, "acodec" => "aac", "abr" => 96, "vcodec" => "h264"}, - "22" => {"ext" => "mp4", "width" => 1280, "height" => 720, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, - "34" => {"ext" => "flv", "width" => 640, "height" => 360, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "35" => {"ext" => "flv", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - - "36" => {"ext" => "3gp", "width" => 320, "acodec" => "aac", "vcodec" => "mp4v"}, - "37" => {"ext" => "mp4", "width" => 1920, "height" => 1080, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, - "38" => {"ext" => "mp4", "width" => 4096, "height" => 3072, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, - "43" => {"ext" => "webm", "width" => 640, "height" => 360, "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, - "44" => {"ext" => "webm", "width" => 854, "height" => 480, "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, - "45" => {"ext" => "webm", "width" => 1280, "height" => 720, "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, - "46" => {"ext" => "webm", "width" => 1920, "height" => 1080, "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, - "59" => {"ext" => "mp4", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "78" => {"ext" => "mp4", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - - # 3D videos - "82" => {"ext" => "mp4", "height" => 360, "format" => "3D", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "83" => {"ext" => "mp4", "height" => 480, "format" => "3D", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "84" => {"ext" => "mp4", "height" => 720, "format" => "3D", "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, - "85" => {"ext" => "mp4", "height" => 1080, "format" => "3D", "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, - "100" => {"ext" => "webm", "height" => 360, "format" => "3D", "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, - "101" => {"ext" => "webm", "height" => 480, "format" => "3D", "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, - "102" => {"ext" => "webm", "height" => 720, "format" => "3D", "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, - - # Apple HTTP Live Streaming - "91" => {"ext" => "mp4", "height" => 144, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, - "92" => {"ext" => "mp4", "height" => 240, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, - "93" => {"ext" => "mp4", "height" => 360, "format" => "HLS", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "94" => {"ext" => "mp4", "height" => 480, "format" => "HLS", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, - "95" => {"ext" => "mp4", "height" => 720, "format" => "HLS", "acodec" => "aac", "abr" => 256, "vcodec" => "h264"}, - "96" => {"ext" => "mp4", "height" => 1080, "format" => "HLS", "acodec" => "aac", "abr" => 256, "vcodec" => "h264"}, - "132" => {"ext" => "mp4", "height" => 240, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, - "151" => {"ext" => "mp4", "height" => 72, "format" => "HLS", "acodec" => "aac", "abr" => 24, "vcodec" => "h264"}, - - # DASH mp4 video - "133" => {"ext" => "mp4", "height" => 240, "format" => "DASH video", "vcodec" => "h264"}, - "134" => {"ext" => "mp4", "height" => 360, "format" => "DASH video", "vcodec" => "h264"}, - "135" => {"ext" => "mp4", "height" => 480, "format" => "DASH video", "vcodec" => "h264"}, - "136" => {"ext" => "mp4", "height" => 720, "format" => "DASH video", "vcodec" => "h264"}, - "137" => {"ext" => "mp4", "height" => 1080, "format" => "DASH video", "vcodec" => "h264"}, - "138" => {"ext" => "mp4", "format" => "DASH video", "vcodec" => "h264"}, # Height can vary (https=>//github.com/rg3/youtube-dl/issues/4559) - "160" => {"ext" => "mp4", "height" => 144, "format" => "DASH video", "vcodec" => "h264"}, - "212" => {"ext" => "mp4", "height" => 480, "format" => "DASH video", "vcodec" => "h264"}, - "264" => {"ext" => "mp4", "height" => 1440, "format" => "DASH video", "vcodec" => "h264"}, - "298" => {"ext" => "mp4", "height" => 720, "format" => "DASH video", "vcodec" => "h264", "fps" => 60}, - "299" => {"ext" => "mp4", "height" => 1080, "format" => "DASH video", "vcodec" => "h264", "fps" => 60}, - "266" => {"ext" => "mp4", "height" => 2160, "format" => "DASH video", "vcodec" => "h264"}, - - # Dash mp4 audio - "139" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 48, "container" => "m4a_dash"}, - "140" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 128, "container" => "m4a_dash"}, - "141" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 256, "container" => "m4a_dash"}, - "256" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "container" => "m4a_dash"}, - "258" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "container" => "m4a_dash"}, - "325" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "dtse", "container" => "m4a_dash"}, - "328" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "ec-3", "container" => "m4a_dash"}, - - # Dash webm - "167" => {"ext" => "webm", "height" => 360, "width" => 640, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "168" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "169" => {"ext" => "webm", "height" => 720, "width" => 1280, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "170" => {"ext" => "webm", "height" => 1080, "width" => 1920, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "218" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "219" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, - "278" => {"ext" => "webm", "height" => 144, "format" => "DASH video", "container" => "webm", "vcodec" => "vp9"}, - "242" => {"ext" => "webm", "height" => 240, "format" => "DASH video", "vcodec" => "vp9"}, - "243" => {"ext" => "webm", "height" => 360, "format" => "DASH video", "vcodec" => "vp9"}, - "244" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, - "245" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, - "246" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, - "247" => {"ext" => "webm", "height" => 720, "format" => "DASH video", "vcodec" => "vp9"}, - "248" => {"ext" => "webm", "height" => 1080, "format" => "DASH video", "vcodec" => "vp9"}, - "271" => {"ext" => "webm", "height" => 1440, "format" => "DASH video", "vcodec" => "vp9"}, - # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - "272" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9"}, - "302" => {"ext" => "webm", "height" => 720, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, - "303" => {"ext" => "webm", "height" => 1080, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, - "308" => {"ext" => "webm", "height" => 1440, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, - "313" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9"}, - "315" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, - - # Dash webm audio - "171" => {"ext" => "webm", "acodec" => "vorbis", "format" => "DASH audio", "abr" => 128}, - "172" => {"ext" => "webm", "acodec" => "vorbis", "format" => "DASH audio", "abr" => 256}, - - # Dash webm audio with opus inside - "249" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 50}, - "250" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 70}, - "251" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 160}, - } - - return formats[itag] -end diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr new file mode 100644 index 00000000..fcc191f6 --- /dev/null +++ b/src/invidious/helpers/helpers.cr @@ -0,0 +1,273 @@ +class Config + YAML.mapping({ + crawl_threads: Int32, + channel_threads: Int32, + video_threads: Int32, + db: NamedTuple( + user: String, + password: String, + host: String, + port: Int32, + dbname: String, + ), + dl_api_key: String?, + https_only: Bool?, + hmac_key: String?, + }) +end + +class FilteredCompressHandler < Kemal::Handler + exclude ["/videoplayback", "/api/*"] + + def call(env) + return call_next env if exclude_match? env + + {% if flag?(:without_zlib) %} + call_next env + {% else %} + request_headers = env.request.headers + + if request_headers.includes_word?("Accept-Encoding", "gzip") + env.response.headers["Content-Encoding"] = "gzip" + env.response.output = Gzip::Writer.new(env.response.output, sync_close: true) + elsif request_headers.includes_word?("Accept-Encoding", "deflate") + env.response.headers["Content-Encoding"] = "deflate" + env.response.output = Flate::Writer.new(env.response.output, sync_close: true) + end + + call_next env + {% end %} + end +end + +def rank_videos(db, n, filter, url) + top = [] of {Float64, String} + + db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs| + rs.each do + id = rs.read(String) + wilson_score = rs.read(Float64) + published = rs.read(Time) + + # Exponential decay, older videos tend to rank lower + temperature = wilson_score * Math.exp(-0.000005*((Time.now - published).total_minutes)) + top << {temperature, id} + end + end + + top.sort! + + # Make hottest come first + top.reverse! + top = top.map { |a, b| b } + + if filter + language_list = [] of String + top.each do |id| + if language_list.size == n + break + else + client = make_client(url) + begin + video = get_video(id, db) + rescue ex + next + end + + if video.language + language = video.language + else + description = XML.parse(video.description) + content = [video.title, description.content].join(" ") + content = content[0, 10000] + + results = DetectLanguage.detect(content) + language = results[0].language + + db.exec("UPDATE videos SET language = $1 WHERE id = $2", language, id) + end + + if language == "en" + language_list << id + end + end + end + return language_list + else + return top[0..n - 1] + end +end + +def login_req(login_form, f_req) + data = { + "pstMsg" => "1", + "checkConnection" => "youtube", + "checkedDomains" => "youtube", + "hl" => "en", + "deviceinfo" => %q([null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]), + "f.req" => f_req, + "flowName" => "GlifWebSignIn", + "flowEntry" => "ServiceLogin", + } + + data = login_form.merge(data) + + return HTTP::Params.encode(data) +end + +def produce_playlist_url(ucid, index) + ucid = ucid.lchop("UC") + ucid = "VLUU" + ucid + + continuation = write_var_int(index) + continuation.unshift(0x08_u8) + slice = continuation.to_unsafe.to_slice(continuation.size) + + continuation = Base64.urlsafe_encode(slice, false) + continuation = "PT:" + continuation + continuation = continuation.bytes + continuation.unshift(0x7a_u8, continuation.size.to_u8) + + slice = continuation.to_unsafe.to_slice(continuation.size) + continuation = Base64.urlsafe_encode(slice) + continuation = URI.escape(continuation) + continuation = continuation.bytes + continuation.unshift(continuation.size.to_u8) + + continuation.unshift(ucid.size.to_u8) + continuation = ucid.bytes + continuation + continuation.unshift(0x12.to_u8, ucid.size.to_u8) + continuation.unshift(0xe2_u8, 0xa9_u8, 0x85_u8, 0xb2_u8, 2_u8, continuation.size.to_u8) + + slice = continuation.to_unsafe.to_slice(continuation.size) + continuation = Base64.urlsafe_encode(slice) + continuation = URI.escape(continuation) + + url = "/browse_ajax?action_continuation=1&continuation=#{continuation}" + + return url +end + +def produce_videos_url(ucid, page = 1) + page = "#{page}" + + meta = "\x12\x06videos \x00\x30\x02\x38\x01\x60\x01\x6a\x00\x7a" + meta += page.size.to_u8.unsafe_chr + meta += page + meta += "\xb8\x01\x00" + + meta = Base64.urlsafe_encode(meta) + meta = URI.escape(meta) + + continuation = "\x12" + continuation += ucid.size.to_u8.unsafe_chr + continuation += ucid + continuation += "\x1a" + continuation += meta.size.to_u8.unsafe_chr + continuation += meta + + continuation = continuation.size.to_u8.unsafe_chr + continuation + continuation = "\xe2\xa9\x85\xb2\x02" + continuation + + continuation = Base64.urlsafe_encode(continuation) + continuation = URI.escape(continuation) + + url = "/browse_ajax?continuation=#{continuation}" + + return url +end + +def read_var_int(bytes) + numRead = 0 + result = 0 + + read = bytes[numRead] + + if bytes.size == 1 + result = bytes[0].to_i32 + else + while ((read & 0b10000000) != 0) + read = bytes[numRead].to_u64 + value = (read & 0b01111111) + result |= (value << (7 * numRead)) + + numRead += 1 + if numRead > 5 + raise "VarInt is too big" + end + end + end + + return result +end + +def write_var_int(value : Int) + bytes = [] of UInt8 + value = value.to_u32 + + if value == 0 + bytes = [0_u8] + else + while value != 0 + temp = (value & 0b01111111).to_u8 + value = value >> 7 + + if value != 0 + temp |= 0b10000000 + end + + bytes << temp + end + end + + return bytes +end + +def generate_captcha(key) + minute = Random::Secure.rand(12) + minute_angle = minute * 30 + minute = minute * 5 + + hour = Random::Secure.rand(12) + hour_angle = hour * 30 + minute_angle.to_f / 12 + if hour == 0 + hour = 12 + end + + clock_svg = <<-END_SVG + + + + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + + + + + + END_SVG + + challenge = "" + convert = Process.run(%(convert -density 1200 -resize 400x400 -background none svg:- png:-), shell: true, + input: IO::Memory.new(clock_svg), output: Process::Redirect::Pipe) do |proc| + challenge = proc.output.gets_to_end + challenge = Base64.strict_encode(challenge) + challenge = "data:image/png;base64,#{challenge}" + end + + answer = "#{hour}:#{minute.to_s.rjust(2, '0')}" + token = OpenSSL::HMAC.digest(:sha256, key, answer) + token = Base64.encode(token) + + return {challenge: challenge, token: token} +end diff --git a/src/invidious/helpers/macros.cr b/src/invidious/helpers/macros.cr new file mode 100644 index 00000000..377b2cab --- /dev/null +++ b/src/invidious/helpers/macros.cr @@ -0,0 +1,18 @@ +macro add_mapping(mapping) + def initialize({{*mapping.keys.map { |id| "@#{id}".id }}}) + end + + def to_a + return [{{*mapping.keys.map { |id| "@#{id}".id }}}] + end + + DB.mapping({{mapping}}) +end + +macro templated(filename) + render "src/invidious/views/#{{{filename}}}.ecr", "src/invidious/views/layout.ecr" +end + +macro rendered(filename) + render "src/invidious/views/#{{{filename}}}.ecr" +end diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr new file mode 100644 index 00000000..e0ae7f6a --- /dev/null +++ b/src/invidious/helpers/utils.cr @@ -0,0 +1,129 @@ +# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html +def ci_lower_bound(pos, n) + if n == 0 + return 0.0 + end + + # z value here represents a confidence level of 0.95 + z = 1.96 + phat = 1.0*pos/n + + return (phat + z*z/(2*n) - z * Math.sqrt((phat*(1 - phat) + z*z/(4*n))/n))/(1 + z*z/n) +end + +def elapsed_text(elapsed) + millis = elapsed.total_milliseconds + return "#{millis.round(2)}ms" if millis >= 1 + + "#{(millis * 1000).round(2)}µs" +end + +def make_client(url) + context = OpenSSL::SSL::Context::Client.new + context.add_options( + OpenSSL::SSL::Options::ALL | + OpenSSL::SSL::Options::NO_SSL_V2 | + OpenSSL::SSL::Options::NO_SSL_V3 + ) + client = HTTP::Client.new(url, context) + client.read_timeout = 10.seconds + client.connect_timeout = 10.seconds + return client +end + +def decode_length_seconds(string) + length_seconds = string.split(":").map { |a| a.to_i } + length_seconds = [0] * (3 - length_seconds.size) + length_seconds + length_seconds = Time::Span.new(length_seconds[0], length_seconds[1], length_seconds[2]) + length_seconds = length_seconds.total_seconds.to_i + + return length_seconds +end + +def decode_time(string) + time = string.try &.to_f? + + if !time + hours = /(?\d+)h/.match(string).try &.["hours"].try &.to_f + hours ||= 0 + + minutes = /(?\d+)m(?!s)/.match(string).try &.["minutes"].try &.to_f + minutes ||= 0 + + seconds = /(?\d+)s/.match(string).try &.["seconds"].try &.to_f + seconds ||= 0 + + millis = /(?\d+)ms/.match(string).try &.["millis"].try &.to_f + millis ||= 0 + + time = hours * 3600 + minutes * 60 + seconds + millis / 1000 + end + + return time +end + +def decode_date(string : String) + # Time matches format "20 hours ago", "40 minutes ago"... + date = string.split(" ")[-3, 3] + delta = date[0].to_i + + case date[1] + when .includes? "minute" + delta = delta.minutes + when .includes? "hour" + delta = delta.hours + when .includes? "day" + delta = delta.days + when .includes? "week" + delta = delta.weeks + when .includes? "month" + delta = delta.months + when .includes? "year" + delta = delta.years + else + raise "Could not parse #{string}" + end + + return Time.now - delta +end + +def recode_date(time : Time) + span = Time.now - time + + if span.total_days > 365.0 + span = {span.total_days / 365, "year"} + elsif span.total_days > 30.0 + span = {span.total_days / 30, "month"} + elsif span.total_days > 7.0 + span = {span.total_days / 7, "week"} + elsif span.total_hours > 24.0 + span = {span.total_days, "day"} + elsif span.total_minutes > 60.0 + span = {span.total_hours, "hour"} + else + span = {0, "units"} + end + + span = {span[0].to_i, span[1]} + if span[0] > 1 + span = {span[0], span[1] + "s"} + end + + return span.join(" ") +end + +def number_with_separator(number) + number.to_s.reverse.gsub(/(\d{3})(?=\d)/, "\\1,").reverse +end + +def arg_array(array, start = 1) + if array.size == 0 + args = "NULL" + else + args = [] of String + (start..array.size + start - 1).each { |i| args << "($#{i})" } + args = args.join(",") + end + + return args +end diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr new file mode 100644 index 00000000..7b3f0bfa --- /dev/null +++ b/src/invidious/jobs.cr @@ -0,0 +1,136 @@ +def crawl_videos(db) + ids = Deque(String).new + random = Random.new + + search(random.base64(3)).each do |video| + ids << video.id + end + + loop do + if ids.empty? + search(random.base64(3)).each do |video| + ids << video.id + end + end + + begin + id = ids[0] + video = get_video(id, db) + rescue ex + STDOUT << id << " : " << ex.message << "\n" + next + ensure + ids.delete(id) + end + + rvs = [] of Hash(String, String) + if video.info.has_key?("rvs") + video.info["rvs"].split(",").each do |rv| + rvs << HTTP::Params.parse(rv).to_h + end + end + + rvs.each do |rv| + if rv.has_key?("id") && !db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", rv["id"], as: Bool) + ids.delete(id) + ids << rv["id"] + if ids.size == 150 + ids.shift + end + end + end + + Fiber.yield + end +end + +def refresh_channels(db) + loop do + db.query("SELECT id FROM channels ORDER BY updated") do |rs| + rs.each do + client = make_client(YT_URL) + + begin + id = rs.read(String) + channel = fetch_channel(id, client, db, false) + db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id) + rescue ex + STDOUT << id << " : " << ex.message << "\n" + next + end + end + end + + Fiber.yield + end +end + +def refresh_videos(db) + loop do + db.query("SELECT id FROM videos ORDER BY updated") do |rs| + rs.each do + begin + id = rs.read(String) + video = get_video(id, db) + rescue ex + STDOUT << id << " : " << ex.message << "\n" + next + end + end + end + + Fiber.yield + end +end + +def pull_top_videos(config, db) + if config.dl_api_key + DetectLanguage.configure do |dl_config| + dl_config.api_key = config.dl_api_key.not_nil! + end + filter = true + end + + filter ||= false + + loop do + begin + top = rank_videos(db, 40, filter, YT_URL) + rescue ex + next + end + + if top.size > 0 + args = arg_array(top) + else + next + end + + videos = [] of Video + + top.each do |id| + begin + videos << get_video(id, db) + rescue ex + next + end + end + + yield videos + Fiber.yield + end +end + +def update_decrypt_function + loop do + begin + client = make_client(YT_URL) + decrypt_function = fetch_decrypt_function(client) + rescue ex + next + end + + yield decrypt_function + Fiber.yield + end +end diff --git a/src/invidious/search.cr b/src/invidious/search.cr new file mode 100644 index 00000000..034db789 --- /dev/null +++ b/src/invidious/search.cr @@ -0,0 +1,30 @@ +def search(query, page = 1) + client = make_client(YT_URL) + html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body + html = XML.parse_html(html) + + videos = [] of ChannelVideo + + html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item| + root = item.xpath_node(%q(div[contains(@class,"yt-lockup-video")]/div)) + if !root + next + end + + id = root.xpath_node(%q(.//div[contains(@class,"yt-lockup-thumbnail")]/a/@href)).not_nil!.content.lchop("/watch?v=") + + title = root.xpath_node(%q(.//div[@class="yt-lockup-content"]/h3/a)).not_nil!.content + + author = root.xpath_node(%q(.//div[@class="yt-lockup-content"]/div/a)).not_nil! + ucid = author["href"].rpartition("/")[-1] + author = author.content + + published = root.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li[1])).not_nil!.content + published = decode_date(published) + + video = ChannelVideo.new(id, title, published, Time.now, ucid, author) + videos << video + end + + return videos +end diff --git a/src/invidious/signatures.cr b/src/invidious/signatures.cr new file mode 100644 index 00000000..589ad647 --- /dev/null +++ b/src/invidious/signatures.cr @@ -0,0 +1,65 @@ +def fetch_decrypt_function(client, id = "CvFH_6DNRCY") + document = client.get("/watch?v=#{id}").body + url = document.match(/src="(?\/yts\/jsbin\/player-.{9}\/en_US\/base.js)"/).not_nil!["url"] + player = client.get(url).body + + function_name = player.match(/\(b\|\|\(b="signature"\),d.set\(b,(?[a-zA-Z0-9]{2})\(c\)\)\)/).not_nil!["name"] + function_body = player.match(/#{function_name}=function\(a\){(?[^}]+)}/).not_nil!["body"] + function_body = function_body.split(";")[1..-2] + + var_name = function_body[0][0, 2] + + operations = {} of String => String + matches = player.delete("\n").match(/var #{var_name}={(?[a-zA-Z0-9]{2}:[^}]+}),(?[a-zA-Z0-9]{2}:[^}]+}),(?[a-zA-Z0-9]{2}:[^}]+})};/).not_nil! + 3.times do |i| + operation = matches["op#{i + 1}"] + op_name = operation[0, 2] + + op_body = operation.match(/\{[^}]+\}/).not_nil![0] + case op_body + when "{a.reverse()}" + operations[op_name] = "a" + when "{a.splice(0,b)}" + operations[op_name] = "b" + else + operations[op_name] = "c" + end + end + + decrypt_function = [] of {name: String, value: Int32} + function_body.each do |function| + function = function.lchop(var_name + ".") + op_name = function[0, 2] + + function = function.lchop(op_name + "(a,") + value = function.rchop(")").to_i + + decrypt_function << {name: operations[op_name], value: value} + end + + return decrypt_function +end + +def decrypt_signature(a, code) + a = a.split("") + + code.each do |item| + case item[:name] + when "a" + a.reverse! + when "b" + a.delete_at(0..(item[:value] - 1)) + when "c" + a = splice(a, item[:value]) + end + end + + return a.join("") +end + +def splice(a, b) + c = a[0] + a[0] = a[b % a.size] + a[b % a.size] = c + return a +end diff --git a/src/invidious/users.cr b/src/invidious/users.cr new file mode 100644 index 00000000..c27825e7 --- /dev/null +++ b/src/invidious/users.cr @@ -0,0 +1,146 @@ +class User + module PreferencesConverter + def self.from_rs(rs) + begin + Preferences.from_json(rs.read(String)) + rescue ex + DEFAULT_USER_PREFERENCES + end + end + end + + add_mapping({ + id: String, + updated: Time, + notifications: Array(String), + subscriptions: Array(String), + email: String, + preferences: { + type: Preferences, + default: DEFAULT_USER_PREFERENCES, + converter: PreferencesConverter, + }, + password: String?, + token: String, + watched: Array(String), + }) +end + +DEFAULT_USER_PREFERENCES = Preferences.from_json({ + "video_loop" => false, + "autoplay" => false, + "speed" => 1.0, + "quality" => "hd720", + "volume" => 100, + "comments" => "youtube", + "dark_mode" => false, + "thin_mode " => false, + "max_results" => 40, + "sort" => "published", + "latest_only" => false, + "unseen_only" => false, +}.to_json) + +# TODO: Migrate preferences so fields will not be nilable +class Preferences + JSON.mapping({ + video_loop: Bool, + autoplay: Bool, + speed: Float32, + quality: String, + volume: Int32, + comments: { + type: String, + nilable: true, + default: "youtube", + }, + redirect_feed: { + type: Bool, + nilable: true, + default: false, + }, + dark_mode: Bool, + thin_mode: { + type: Bool, + nilable: true, + default: false, + }, + max_results: Int32, + sort: String, + latest_only: Bool, + unseen_only: Bool, + notifications_only: { + type: Bool, + nilable: true, + default: false, + }, + }) +end + +def get_user(sid, client, headers, db, refresh = true) + if db.query_one?("SELECT EXISTS (SELECT true FROM users WHERE id = $1)", sid, as: Bool) + user = db.query_one("SELECT * FROM users WHERE id = $1", sid, as: User) + + if refresh && Time.now - user.updated > 1.minute + user = fetch_user(sid, client, headers, db) + user_array = user.to_a + + user_array[5] = user_array[5].to_json + args = arg_array(user_array) + + db.exec("INSERT INTO users VALUES (#{args}) \ + ON CONFLICT (email) DO UPDATE SET id = $1, updated = $2, subscriptions = $4", user_array) + end + else + user = fetch_user(sid, client, headers, db) + user_array = user.to_a + + user_array[5] = user_array[5].to_json + args = arg_array(user.to_a) + + db.exec("INSERT INTO users VALUES (#{args}) \ + ON CONFLICT (email) DO UPDATE SET id = $1, updated = $2, subscriptions = $4", user_array) + end + + return user +end + +def fetch_user(sid, client, headers, db) + feed = client.get("/subscription_manager?disable_polymer=1", headers) + feed = XML.parse_html(feed.body) + + channels = [] of String + feed.xpath_nodes(%q(//ul[@id="guide-channels"]/li/a)).each do |channel| + if !["Popular on YouTube", "Music", "Sports", "Gaming"].includes? channel["title"] + channel_id = channel["href"].lstrip("/channel/") + + begin + channel = get_channel(channel_id, client, db, false, false) + channels << channel.id + rescue ex + next + end + end + end + + email = feed.xpath_node(%q(//a[@class="yt-masthead-picker-header yt-masthead-picker-active-account"])) + if email + email = email.content.strip + else + email = "" + end + + token = Base64.urlsafe_encode(Random::Secure.random_bytes(32)) + + user = User.new(sid, Time.now, [] of String, channels, email, DEFAULT_USER_PREFERENCES, nil, token, [] of String) + return user +end + +def create_user(sid, email, password) + password = Crypto::Bcrypt::Password.create(password, cost: 10) + token = Base64.urlsafe_encode(Random::Secure.random_bytes(32)) + + user = User.new(sid, Time.now, [] of String, [] of String, email, DEFAULT_USER_PREFERENCES, password.to_s, token, [] of String) + + return user +end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr new file mode 100644 index 00000000..5430bc63 --- /dev/null +++ b/src/invidious/videos.cr @@ -0,0 +1,223 @@ +class Video + module HTTPParamConverter + def self.from_rs(rs) + HTTP::Params.parse(rs.read(String)) + end + end + + add_mapping({ + id: String, + info: { + type: HTTP::Params, + default: HTTP::Params.parse(""), + converter: Video::HTTPParamConverter, + }, + updated: Time, + title: String, + views: Int64, + likes: Int32, + dislikes: Int32, + wilson_score: Float64, + published: Time, + description: String, + language: String?, + author: String, + ucid: String, + allowed_regions: Array(String), + is_family_friendly: Bool, + genre: String, + }) +end + +def get_video(id, db, refresh = true) + if db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", id, as: Bool) + video = db.query_one("SELECT * FROM videos WHERE id = $1", id, as: Video) + + # If record was last updated over an hour ago, refresh (expire param in response lasts for 6 hours) + if refresh && Time.now - video.updated > 1.hour + begin + video = fetch_video(id) + video_array = video.to_a + args = arg_array(video_array[1..-1], 2) + + db.exec("UPDATE videos SET (info,updated,title,views,likes,dislikes,wilson_score,\ + published,description,language,author,ucid, allowed_regions, is_family_friendly, genre)\ + = (#{args}) WHERE id = $1", video_array) + rescue ex + db.exec("DELETE FROM videos * WHERE id = $1", id) + raise ex + end + end + else + video = fetch_video(id) + video_array = video.to_a + args = arg_array(video_array) + + db.exec("INSERT INTO videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", video_array) + end + + return video +end + +def fetch_video(id) + html_channel = Channel(XML::Node).new + info_channel = Channel(HTTP::Params).new + + spawn do + client = make_client(YT_URL) + html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&disable_polymer=1") + html = XML.parse_html(html.body) + + html_channel.send(html) + end + + spawn do + client = make_client(YT_URL) + info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") + info = HTTP::Params.parse(info.body) + + if info["reason"]? + info = client.get("/get_video_info?video_id=#{id}&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") + info = HTTP::Params.parse(info.body) + end + + info_channel.send(info) + end + + html = html_channel.receive + info = info_channel.receive + + if info["reason"]? + raise info["reason"] + end + + title = info["title"] + views = info["view_count"].to_i64 + author = info["author"] + ucid = info["ucid"] + + likes = html.xpath_node(%q(//button[@title="I like this"]/span)) + likes = likes.try &.content.delete(",").try &.to_i + likes ||= 0 + + dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) + dislikes = dislikes.try &.content.delete(",").try &.to_i + dislikes ||= 0 + + description = html.xpath_node(%q(//p[@id="eow-description"])) + description = description ? description.to_xml : "" + + wilson_score = ci_lower_bound(likes, likes + dislikes) + + published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).not_nil!["content"] + published = Time.parse(published, "%Y-%m-%d", Time::Location.local) + + allowed_regions = html.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",") + is_family_friendly = html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True" + genre = html.xpath_node(%q(//meta[@itemprop="genre"])).not_nil!["content"] + + video = Video.new(id, info, Time.now, title, views, likes, dislikes, wilson_score, published, description, + nil, author, ucid, allowed_regions, is_family_friendly, genre) + + return video +end + +def itag_to_metadata(itag : String) + # See https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L380-#L476 + formats = {"5" => {"ext" => "flv", "width" => 400, "height" => 240, "acodec" => "mp3", "abr" => 64, "vcodec" => "h263"}, + "6" => {"ext" => "flv", "width" => 450, "height" => 270, "acodec" => "mp3", "abr" => 64, "vcodec" => "h263"}, + "13" => {"ext" => "3gp", "acodec" => "aac", "vcodec" => "mp4v"}, + "17" => {"ext" => "3gp", "width" => 176, "height" => 144, "acodec" => "aac", "abr" => 24, "vcodec" => "mp4v"}, + "18" => {"ext" => "mp4", "width" => 640, "height" => 360, "acodec" => "aac", "abr" => 96, "vcodec" => "h264"}, + "22" => {"ext" => "mp4", "width" => 1280, "height" => 720, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, + "34" => {"ext" => "flv", "width" => 640, "height" => 360, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "35" => {"ext" => "flv", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + + "36" => {"ext" => "3gp", "width" => 320, "acodec" => "aac", "vcodec" => "mp4v"}, + "37" => {"ext" => "mp4", "width" => 1920, "height" => 1080, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, + "38" => {"ext" => "mp4", "width" => 4096, "height" => 3072, "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, + "43" => {"ext" => "webm", "width" => 640, "height" => 360, "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, + "44" => {"ext" => "webm", "width" => 854, "height" => 480, "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, + "45" => {"ext" => "webm", "width" => 1280, "height" => 720, "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, + "46" => {"ext" => "webm", "width" => 1920, "height" => 1080, "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, + "59" => {"ext" => "mp4", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "78" => {"ext" => "mp4", "width" => 854, "height" => 480, "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + + # 3D videos + "82" => {"ext" => "mp4", "height" => 360, "format" => "3D", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "83" => {"ext" => "mp4", "height" => 480, "format" => "3D", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "84" => {"ext" => "mp4", "height" => 720, "format" => "3D", "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, + "85" => {"ext" => "mp4", "height" => 1080, "format" => "3D", "acodec" => "aac", "abr" => 192, "vcodec" => "h264"}, + "100" => {"ext" => "webm", "height" => 360, "format" => "3D", "acodec" => "vorbis", "abr" => 128, "vcodec" => "vp8"}, + "101" => {"ext" => "webm", "height" => 480, "format" => "3D", "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, + "102" => {"ext" => "webm", "height" => 720, "format" => "3D", "acodec" => "vorbis", "abr" => 192, "vcodec" => "vp8"}, + + # Apple HTTP Live Streaming + "91" => {"ext" => "mp4", "height" => 144, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, + "92" => {"ext" => "mp4", "height" => 240, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, + "93" => {"ext" => "mp4", "height" => 360, "format" => "HLS", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "94" => {"ext" => "mp4", "height" => 480, "format" => "HLS", "acodec" => "aac", "abr" => 128, "vcodec" => "h264"}, + "95" => {"ext" => "mp4", "height" => 720, "format" => "HLS", "acodec" => "aac", "abr" => 256, "vcodec" => "h264"}, + "96" => {"ext" => "mp4", "height" => 1080, "format" => "HLS", "acodec" => "aac", "abr" => 256, "vcodec" => "h264"}, + "132" => {"ext" => "mp4", "height" => 240, "format" => "HLS", "acodec" => "aac", "abr" => 48, "vcodec" => "h264"}, + "151" => {"ext" => "mp4", "height" => 72, "format" => "HLS", "acodec" => "aac", "abr" => 24, "vcodec" => "h264"}, + + # DASH mp4 video + "133" => {"ext" => "mp4", "height" => 240, "format" => "DASH video", "vcodec" => "h264"}, + "134" => {"ext" => "mp4", "height" => 360, "format" => "DASH video", "vcodec" => "h264"}, + "135" => {"ext" => "mp4", "height" => 480, "format" => "DASH video", "vcodec" => "h264"}, + "136" => {"ext" => "mp4", "height" => 720, "format" => "DASH video", "vcodec" => "h264"}, + "137" => {"ext" => "mp4", "height" => 1080, "format" => "DASH video", "vcodec" => "h264"}, + "138" => {"ext" => "mp4", "format" => "DASH video", "vcodec" => "h264"}, # Height can vary (https=>//github.com/rg3/youtube-dl/issues/4559) + "160" => {"ext" => "mp4", "height" => 144, "format" => "DASH video", "vcodec" => "h264"}, + "212" => {"ext" => "mp4", "height" => 480, "format" => "DASH video", "vcodec" => "h264"}, + "264" => {"ext" => "mp4", "height" => 1440, "format" => "DASH video", "vcodec" => "h264"}, + "298" => {"ext" => "mp4", "height" => 720, "format" => "DASH video", "vcodec" => "h264", "fps" => 60}, + "299" => {"ext" => "mp4", "height" => 1080, "format" => "DASH video", "vcodec" => "h264", "fps" => 60}, + "266" => {"ext" => "mp4", "height" => 2160, "format" => "DASH video", "vcodec" => "h264"}, + + # Dash mp4 audio + "139" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 48, "container" => "m4a_dash"}, + "140" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 128, "container" => "m4a_dash"}, + "141" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "abr" => 256, "container" => "m4a_dash"}, + "256" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "container" => "m4a_dash"}, + "258" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "aac", "container" => "m4a_dash"}, + "325" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "dtse", "container" => "m4a_dash"}, + "328" => {"ext" => "m4a", "format" => "DASH audio", "acodec" => "ec-3", "container" => "m4a_dash"}, + + # Dash webm + "167" => {"ext" => "webm", "height" => 360, "width" => 640, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "168" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "169" => {"ext" => "webm", "height" => 720, "width" => 1280, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "170" => {"ext" => "webm", "height" => 1080, "width" => 1920, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "218" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "219" => {"ext" => "webm", "height" => 480, "width" => 854, "format" => "DASH video", "container" => "webm", "vcodec" => "vp8"}, + "278" => {"ext" => "webm", "height" => 144, "format" => "DASH video", "container" => "webm", "vcodec" => "vp9"}, + "242" => {"ext" => "webm", "height" => 240, "format" => "DASH video", "vcodec" => "vp9"}, + "243" => {"ext" => "webm", "height" => 360, "format" => "DASH video", "vcodec" => "vp9"}, + "244" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, + "245" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, + "246" => {"ext" => "webm", "height" => 480, "format" => "DASH video", "vcodec" => "vp9"}, + "247" => {"ext" => "webm", "height" => 720, "format" => "DASH video", "vcodec" => "vp9"}, + "248" => {"ext" => "webm", "height" => 1080, "format" => "DASH video", "vcodec" => "vp9"}, + "271" => {"ext" => "webm", "height" => 1440, "format" => "DASH video", "vcodec" => "vp9"}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) + "272" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9"}, + "302" => {"ext" => "webm", "height" => 720, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, + "303" => {"ext" => "webm", "height" => 1080, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, + "308" => {"ext" => "webm", "height" => 1440, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, + "313" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9"}, + "315" => {"ext" => "webm", "height" => 2160, "format" => "DASH video", "vcodec" => "vp9", "fps" => 60}, + + # Dash webm audio + "171" => {"ext" => "webm", "acodec" => "vorbis", "format" => "DASH audio", "abr" => 128}, + "172" => {"ext" => "webm", "acodec" => "vorbis", "format" => "DASH audio", "abr" => 256}, + + # Dash webm audio with opus inside + "249" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 50}, + "250" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 70}, + "251" => {"ext" => "webm", "format" => "DASH audio", "acodec" => "opus", "abr" => 160}, + } + + return formats[itag] +end