mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-02 14:10:05 -05:00
[adultswim] Improve video_info extraction (Fixes #5152)
Look for video_info inside `slugged_video`, if slug is not found among collections. Also, simplify a bit.
This commit is contained in:
parent
43d6280d0a
commit
cc08b11d16
1 changed files with 32 additions and 12 deletions
|
@ -2,13 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
xpath_text,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -80,6 +97,7 @@ def find_collection_containing_video(collections, slug):
|
||||||
for video in collection.get('videos'):
|
for video in collection.get('videos'):
|
||||||
if video.get('slug') == slug:
|
if video.get('slug') == slug:
|
||||||
return collection, video
|
return collection, video
|
||||||
|
return None, None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -90,28 +108,30 @@ def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, episode_path)
|
webpage = self._download_webpage(url, episode_path)
|
||||||
|
|
||||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
bootstrapped_data = self._parse_json(self._search_regex(
|
||||||
|
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||||
try:
|
|
||||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
|
||||||
except ValueError as ve:
|
|
||||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
|
||||||
raise ExtractorError(errmsg, cause=ve)
|
|
||||||
|
|
||||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
collections = bootstrappedData['playlists']['collections']
|
collections = bootstrapped_data['playlists']['collections']
|
||||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||||
video_info = self.find_video_info(collection, episode_path)
|
video_info = self.find_video_info(collection, episode_path)
|
||||||
|
|
||||||
show_title = video_info['showTitle']
|
show_title = video_info['showTitle']
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
else:
|
else:
|
||||||
collections = bootstrappedData['show']['collections']
|
collections = bootstrapped_data['show']['collections']
|
||||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||||
|
|
||||||
show = bootstrappedData['show']
|
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||||
|
if video_info is None:
|
||||||
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
|
video_info = bootstrapped_data['slugged_video']
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue