From ec4a22687fa386d5e61c4975578f11306b647ccf Mon Sep 17 00:00:00 2001 From: Triplesalt <39653793+Triplesalt@users.noreply.github.com> Date: Fri, 23 Oct 2020 03:40:07 +0200 Subject: [PATCH] Update get_about_info for polymer (iv-org/invidious#1423) (#1429) Update get_about_info for polymer (iv-org/invidious#1423) --- src/invidious/channels.cr | 134 ++++++++++++++++++++------------ src/invidious/views/channel.ecr | 2 +- 2 files changed, 85 insertions(+), 51 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 18bdac09..dce9e6aa 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -775,10 +775,12 @@ def extract_channel_community_cursor(continuation) cursor end +INITDATA_PREQUERY = "window[\"ytInitialData\"] = {" + def get_about_info(ucid, locale) - about = YT_POOL.client &.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en") + about = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en") if about.status_code != 200 - about = YT_POOL.client &.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en") + about = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en") end if md = about.headers["location"]?.try &.match(/\/channel\/(?UC[a-zA-Z0-9_-]{22})/) @@ -790,6 +792,17 @@ def get_about_info(ucid, locale) raise error_message end + initdata_pre = about.body.index(INITDATA_PREQUERY) + initdata_post = initdata_pre.nil? ? nil : about.body.index("};", initdata_pre) + if initdata_post.nil? + about = XML.parse_html(about.body) + error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip + error_message ||= translate(locale, "Could not get channel info.") + raise error_message + end + initdata_pre = initdata_pre.not_nil! + INITDATA_PREQUERY.size - 1 + + initdata = JSON.parse(about.body[initdata_pre, initdata_post - initdata_pre + 1]) about = XML.parse_html(about.body) if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")])) @@ -797,73 +810,94 @@ def get_about_info(ucid, locale) raise error_message end - if about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).try &.content.empty? - error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip - error_message ||= translate(locale, "Could not get channel info.") - raise error_message - end - - author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content - author_url = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!["href"] - author_thumbnail = about.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"] + author = about.xpath_node(%q(//meta[@name="title"])).not_nil!["content"] + author_url = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"] + author_thumbnail = about.xpath_node(%q(//link[@rel="image_src"])).not_nil!["href"] ucid = about.xpath_node(%q(//meta[@itemprop="channelId"])).not_nil!["content"] - banner = about.xpath_node(%q(//div[@id="gh-banner"]/style)).not_nil!.content - banner = "https:" + banner.match(/background-image: url\((?[^)]+)\)/).not_nil!["url"] + # Raises a KeyError on failure. + banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]? + banner = banners.try &.[-1]?.try &.["url"].as_s? - if banner.includes? "channels/c4/default_banner" - banner = nil - end + # if banner.includes? "channels/c4/default_banner" + # banner = nil + # end - description_html = about.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || - %(
) + description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || "" + description_html = HTML.escape(description).gsub("\n", "
") paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True" is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True" allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",") - related_channels = about.xpath_nodes(%q(//div[contains(@class, "branded-page-related-channels")]/ul/li)) - related_channels = related_channels.map do |node| - related_id = node["data-external-id"]? - related_id ||= "" + related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"] + .["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]? + .try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node| + renderer = node["miniChannelRenderer"]? + related_id = renderer.try &.["channelId"]?.try &.as_s? + related_id ||= "" - anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a)) - related_title = anchor.try &.["title"] - related_title ||= "" + related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s? + related_title ||= "" - related_author_url = anchor.try &.["href"] - related_author_url ||= "" + related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]? + .try &.["url"]?.try &.as_s? + related_author_url ||= "" - related_author_thumbnail = node.xpath_node(%q(.//img)).try &.["data-thumb"] - related_author_thumbnail ||= "" + related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a? + related_author_thumbnails ||= [] of JSON::Any - AboutRelatedChannel.new({ - ucid: related_id, - author: related_title, - author_url: related_author_url, - author_thumbnail: related_author_thumbnail, - }) - end + related_author_thumbnail = "" + if related_author_thumbnails.size > 0 + related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s? + related_author_thumbnail ||= "" + end - joined = about.xpath_node(%q(//span[contains(., "Joined")])) - .try &.content.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0) + AboutRelatedChannel.new({ + ucid: related_id, + author: related_title, + author_url: related_author_url, + author_thumbnail: related_author_thumbnail, + }) + end + related_channels ||= [] of AboutRelatedChannel - total_views = about.xpath_node(%q(//span[contains(., "views")]/b)) - .try &.content.try &.gsub(/\D/, "").to_i64? || 0_i64 - - sub_count = about.xpath_node(%q(.//span[contains(@class, "subscriber-count")])) - .try &.["title"].try { |text| short_text_to_number(text) } || 0 - - # Auto-generated channels - # https://support.google.com/youtube/answer/2579942 + total_views = 0_i64 + joined = Time.unix(0) + tabs = [] of String auto_generated = false - if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) || - about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"])) - auto_generated = true + + tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a? + if !tabs_json.nil? + # Retrieve information from the tabs array. The index we are looking for varies between channels. + tabs_json.each do |node| + # Try to find the about section which is located in only one of the tabs. + channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]? + .try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]? + .try &.[0]?.try &.["channelAboutFullMetadataRenderer"]? + + if !channel_about_meta.nil? + total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64 + + # The joined text is split to several sub strings. The reduce joins those strings before parsing the date. + joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s } + .try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0) + + # Auto-generated channels + # https://support.google.com/youtube/answer/2579942 + # For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"] + if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) + (channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube" + auto_generated = true + end + end + end + tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map { |node| node["tabRenderer"]["title"].as_s.downcase } end - tabs = about.xpath_nodes(%q(//ul[@id="channel-navigation-menu"]/li/a/span)).map { |node| node.content.downcase } + sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s? + .try { |text| short_text_to_number(text.split(" ")[0]) } || 0 AboutChannel.new({ ucid: ucid, diff --git a/src/invidious/views/channel.ecr b/src/invidious/views/channel.ecr index 4e9c7a63..caa0ad0e 100644 --- a/src/invidious/views/channel.ecr +++ b/src/invidious/views/channel.ecr @@ -28,7 +28,7 @@
-

<%= XML.parse_html(channel.description_html).xpath_node(%q(.//pre)).try &.content %>

+

<%= channel.description_html %>