mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 20:38:11 -05:00
[youtube] Fix extraction of search urls (closes ytdl-org/youtube-dl#25696)
This commit is contained in:
parent
2391941f28
commit
7a74feda78
1 changed files with 34 additions and 1 deletions
|
@ -3146,7 +3146,40 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
|||
|
||||
|
||||
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
|
||||
_PLAYLIST_DATA = r'window\[\"ytInitialData\"\]\W?=\W?({.*?});'
|
||||
|
||||
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
|
||||
playlist_json = self._search_regex(self._PLAYLIST_DATA, page, 'ytInitialData')
|
||||
playlist_response = self._parse_json(playlist_json, None)
|
||||
|
||||
result_items = try_get(
|
||||
playlist_response,
|
||||
lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'])
|
||||
|
||||
# plobj either contains a 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'shelfRenderer' or 'searchPyvRenderer' (promoted video/ad)
|
||||
for plobj in result_items:
|
||||
video_id = try_get(plobj, lambda x: x['videoRenderer']['videoId'])
|
||||
video_title = try_get(plobj, lambda x: x['videoRenderer']['title']['runs'][0]['text'])
|
||||
|
||||
if video_id is None or video_title is None:
|
||||
# we do not have a videoRenderer or it is empty
|
||||
continue
|
||||
|
||||
video_title = video_title.strip()
|
||||
|
||||
try:
|
||||
idx = ids_in_page.index(video_id)
|
||||
if video_title and not titles_in_page[idx]:
|
||||
titles_in_page[idx] = video_title
|
||||
except ValueError:
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
|
||||
return zip(ids_in_page, titles_in_page)
|
||||
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
||||
|
|
Loading…
Reference in a new issue