From 4a6e920d0e4e2dd4b67006e203e5776893c3639b Mon Sep 17 00:00:00 2001 From: Ben Heller <ben@bheller.me> Date: Wed, 2 Sep 2020 13:28:57 -0700 Subject: [PATCH] Use new youtube API to fetch channel videos (#1355) * Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line --- src/invidious/channels.cr | 64 +++++-- src/invidious/helpers/helpers.cr | 296 +++++++++++++++++-------------- 2 files changed, 204 insertions(+), 156 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index da062755..007aa06c 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) page = 1 - url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = [] of SearchVideo begin @@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) ids = [] of String loop do - url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page, auto_generated: auto_generated) initial_data = JSON.parse(response.body).as_a.find &.["response"]? raise "Could not extract JSON" if !initial_data videos = extract_videos(initial_data.as_h, author, ucid) @@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) return items, continuation end -def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest") +def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) object = { "80226972:embedded" => { "2:string" => ucid, @@ -411,18 +409,33 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = " }, } - if auto_generated - seed = Time.unix(1525757349) - until seed >= Time.utc - seed += 1.month - end - timestamp = seed - (page - 1).months + if !v2 + if auto_generated + seed = Time.unix(1525757349) + until seed >= Time.utc + seed += 1.month + end + timestamp = seed - (page - 1).months - object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" + object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 + object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" + else + object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 + object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" + end else object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" + + object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ + "1:embedded" => { + "1:varint" => 6307666885028338688_i64, + "2:embedded" => { + "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ + "1:varint" => 30_i64 * (page - 1), + }))), + }, + }, + }))) end case sort_by @@ -901,12 +914,28 @@ def get_about_info(ucid, locale) }) end +def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest") + url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false) + response = YT_POOL.client &.get(url) + initial_data = JSON.parse(response.body).as_a.find &.["response"]? + return response if !initial_data + needs_v2 = initial_data + .try &.["response"]?.try &.["alerts"]? + .try &.as_a.any? { |alert| + alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" } + } + if needs_v2 + url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) + response = YT_POOL.client &.get(url) + end + response +end + def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") videos = [] of SearchVideo 2.times do |i| - url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) initial_data = JSON.parse(response.body).as_a.find &.["response"]? break if !initial_data videos.concat extract_videos(initial_data.as_h, author, ucid) @@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") end def get_latest_videos(ucid) - url = produce_channel_videos_url(ucid, 0) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, 1) initial_data = JSON.parse(response.body).as_a.find &.["response"]? return [] of SearchVideo if !initial_data author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 56f856c0..6571f818 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -164,148 +164,168 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) end +def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil) + if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?) + video_id = i["videoId"].as_s + title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" + + author_info = i["ownerText"]?.try &.["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || author_fallback || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" + + published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local + view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || + i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]? + .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 + + live_now = false + paid = false + premium = false + + premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } + + i["badges"]?.try &.as_a.each do |badge| + b = badge["metadataBadgeRenderer"] + case b["label"].as_s + when "LIVE NOW" + live_now = true + when "New", "4K", "CC" + # TODO + when "Premium" + paid = true + + # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] + premium = true + else nil # Ignore + end + end + + SearchVideo.new({ + title: title, + id: video_id, + author: author, + ucid: author_id, + published: published, + views: view_count, + description_html: description_html, + length_seconds: length_seconds, + live_now: live_now, + paid: paid, + premium: premium, + premiere_timestamp: premiere_timestamp, + }) + elsif i = item["channelRenderer"]? + author = i["title"]["simpleText"]?.try &.as_s || author_fallback || "" + author_id = i["channelId"]?.try &.as_s || author_id_fallback || "" + + author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || "" + subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 + + auto_generated = false + auto_generated = true if !i["videoCountText"]? + video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + + SearchChannel.new({ + author: author, + ucid: author_id, + author_thumbnail: author_thumbnail, + subscriber_count: subscriber_count, + video_count: video_count, + description_html: description_html, + auto_generated: auto_generated, + }) + elsif i = item["gridPlaylistRenderer"]? + title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || "" + plid = i["playlistId"]?.try &.as_s || "" + + video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 + playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" + + SearchPlaylist.new({ + title: title, + id: plid, + author: author_fallback || "", + ucid: author_id_fallback || "", + video_count: video_count, + videos: [] of SearchPlaylistVideo, + thumbnail: playlist_thumbnail, + }) + elsif i = item["playlistRenderer"]? + title = i["title"]["simpleText"]?.try &.as_s || "" + plid = i["playlistId"]?.try &.as_s || "" + + video_count = i["videoCount"]?.try &.as_s.to_i || 0 + playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" + + author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || author_fallback || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" + + videos = i["videos"]?.try &.as_a.map do |v| + v = v["childVideoRenderer"] + v_title = v["title"]["simpleText"]?.try &.as_s || "" + v_id = v["videoId"]?.try &.as_s || "" + v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 + SearchPlaylistVideo.new({ + title: v_title, + id: v_id, + length_seconds: v_length_seconds, + }) + end || [] of SearchPlaylistVideo + + # TODO: i["publishedTimeText"]? + + SearchPlaylist.new({ + title: title, + id: plid, + author: author, + ucid: author_id, + video_count: video_count, + videos: videos, + thumbnail: playlist_thumbnail, + }) + elsif i = item["radioRenderer"]? # Mix + # TODO + elsif i = item["showRenderer"]? # Show + # TODO + elsif i = item["shelfRenderer"]? + elsif i = item["horizontalCardListRenderer"]? + elsif i = item["searchPyvRenderer"]? # Ad + end +end + def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) items = [] of SearchItem - initial_data.try { |t| t["contents"]? || t["response"]? } - .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || - t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || - t["continuationContents"]? } - .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } - .try &.["contents"].as_a - .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a - .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || - t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t } - .each { |item| - if i = item["videoRenderer"]? - video_id = i["videoId"].as_s - title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" + channel_v2_response = initial_data + .try &.["response"]? + .try &.["continuationContents"]? + .try &.["gridContinuation"]? + .try &.["items"]? - author_info = i["ownerText"]?.try &.["runs"].as_a[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local - view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 - - live_now = false - paid = false - premium = false - - premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } - - i["badges"]?.try &.as_a.each do |badge| - b = badge["metadataBadgeRenderer"] - case b["label"].as_s - when "LIVE NOW" - live_now = true - when "New", "4K", "CC" - # TODO - when "Premium" - paid = true - - # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] - premium = true - else nil # Ignore - end - end - - items << SearchVideo.new({ - title: title, - id: video_id, - author: author, - ucid: author_id, - published: published, - views: view_count, - description_html: description_html, - length_seconds: length_seconds, - live_now: live_now, - paid: paid, - premium: premium, - premiere_timestamp: premiere_timestamp, - }) - elsif i = item["channelRenderer"]? - author = i["title"]["simpleText"]?.try &.as_s || author_fallback || "" - author_id = i["channelId"]?.try &.as_s || author_id_fallback || "" - - author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || "" - subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 - - auto_generated = false - auto_generated = true if !i["videoCountText"]? - video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - - items << SearchChannel.new({ - author: author, - ucid: author_id, - author_thumbnail: author_thumbnail, - subscriber_count: subscriber_count, - video_count: video_count, - description_html: description_html, - auto_generated: auto_generated, - }) - elsif i = item["gridPlaylistRenderer"]? - title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" - - items << SearchPlaylist.new({ - title: title, - id: plid, - author: author_fallback || "", - ucid: author_id_fallback || "", - video_count: video_count, - videos: [] of SearchPlaylistVideo, - thumbnail: playlist_thumbnail, - }) - elsif i = item["playlistRenderer"]? - title = i["title"]["simpleText"]?.try &.as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCount"]?.try &.as_s.to_i || 0 - playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" - - author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - videos = i["videos"]?.try &.as_a.map do |v| - v = v["childVideoRenderer"] - v_title = v["title"]["simpleText"]?.try &.as_s || "" - v_id = v["videoId"]?.try &.as_s || "" - v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 - SearchPlaylistVideo.new({ - title: v_title, - id: v_id, - length_seconds: v_length_seconds, - }) - end || [] of SearchPlaylistVideo - - # TODO: i["publishedTimeText"]? - - items << SearchPlaylist.new({ - title: title, - id: plid, - author: author, - ucid: author_id, - video_count: video_count, - videos: videos, - thumbnail: playlist_thumbnail, - }) - elsif i = item["radioRenderer"]? # Mix - # TODO - elsif i = item["showRenderer"]? # Show - # TODO - elsif i = item["shelfRenderer"]? - elsif i = item["horizontalCardListRenderer"]? - elsif i = item["searchPyvRenderer"]? # Ad - end - } } + if channel_v2_response + channel_v2_response.try &.as_a.each { |item| + extract_item(item, author_fallback, author_id_fallback) + .try { |t| items << t } + } + else + initial_data.try { |t| t["contents"]? || t["response"]? } + .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || + t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || + t["continuationContents"]? } + .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } + .try &.["contents"].as_a + .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a + .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || + t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t } + .each { |item| + extract_item(item, author_fallback, author_id_fallback) + .try { |t| items << t } + } } + end items end