[ie/tiktok] Fix web formats extraction (#11074)

Closes #11034
Authored by: bashonly
This commit is contained in:
bashonly 2024-09-24 17:10:42 -05:00 committed by GitHub
parent 4a9bc8c363
commit 3ad0b7f422
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -542,16 +542,12 @@ def _extract_web_formats(self, aweme_detail):
**COMMON_FORMAT_INFO,
'format_id': 'download',
'url': self._proto_relative_url(download_url),
'format_note': 'watermarked',
'preference': -2,
})
self._remove_duplicate_formats(formats)
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
f.update({
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
'preference': f.get('preference') or -2,
})
# Is it a slideshow with only audio for download?
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
audio_url = aweme_detail['music']['playUrl']
@ -565,7 +561,8 @@ def _extract_web_formats(self, aweme_detail):
'vcodec': 'none',
})
return formats
# Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {