mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[twitter] Add support for user_id-less URLs (closes #14270)
This commit is contained in:
parent
4bb58fa118
commit
5c1452e8f1
1 changed files with 25 additions and 5 deletions
|
@ -242,8 +242,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class TwitterIE(InfoExtractor):
|
class TwitterIE(InfoExtractor):
|
||||||
IE_NAME = 'twitter'
|
IE_NAME = 'twitter'
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
|
||||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||||
|
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
|
@ -322,9 +323,9 @@ class TwitterIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MIOxnrUteUd',
|
'id': 'MIOxnrUteUd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FilmDrunk - Vine of the day',
|
'title': 'Vince Mancini - Vine of the day',
|
||||||
'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
||||||
'uploader': 'FilmDrunk',
|
'uploader': 'Vince Mancini',
|
||||||
'uploader_id': 'Filmdrunk',
|
'uploader_id': 'Filmdrunk',
|
||||||
'timestamp': 1402826626,
|
'timestamp': 1402826626,
|
||||||
'upload_date': '20140615',
|
'upload_date': '20140615',
|
||||||
|
@ -372,6 +373,21 @@ class TwitterIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=http-]',
|
'format': 'best[format_id^=http-]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '910031516746514432',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
|
||||||
|
'uploader': 'Préfet de Guadeloupe',
|
||||||
|
'uploader_id': 'Prefet971',
|
||||||
|
'duration': 47.48,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -380,11 +396,15 @@ def _real_extract(self, url):
|
||||||
twid = mobj.group('id')
|
twid = mobj.group('id')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
self._TEMPLATE_URL % (user_id, twid), twid)
|
self._TEMPLATE_STATUSES_URL % twid, twid)
|
||||||
|
|
||||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||||
|
|
||||||
|
if user_id is None:
|
||||||
|
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||||
|
user_id = mobj.group('user_id')
|
||||||
|
|
||||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||||
|
|
||||||
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
||||||
|
|
Loading…
Reference in a new issue