mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 20:38:11 -05:00
[ie/substack] Resolve podcast file extensions (#11275)
Closes #4601 Authored by: bashonly
This commit is contained in:
parent
7af1ddaaf2
commit
3148c1822f
1 changed files with 29 additions and 2 deletions
|
@ -2,7 +2,13 @@
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json, str_or_none, traverse_obj
|
from ..networking import HEADRequest
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class SubstackIE(InfoExtractor):
|
class SubstackIE(InfoExtractor):
|
||||||
|
@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor):
|
||||||
'uploader': "Andrew Zimmern's Spilled Milk ",
|
'uploader': "Andrew Zimmern's Spilled Milk ",
|
||||||
'uploader_id': '577659',
|
'uploader_id': '577659',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Podcast that needs its file extension resolved to mp3
|
||||||
|
'url': 'https://persuasion1.substack.com/p/summers',
|
||||||
|
'md5': '1456a755d46084744facdfac9edf900f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '141970405',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Larry Summers on What Went Wrong on Campus',
|
||||||
|
'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
|
||||||
|
'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
|
||||||
|
'uploader': 'Persuasion',
|
||||||
|
'uploader_id': '61579',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -89,7 +108,15 @@ def _real_extract(self, url):
|
||||||
post_type = webpage_info['post']['type']
|
post_type = webpage_info['post']['type']
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
if post_type == 'podcast':
|
if post_type == 'podcast':
|
||||||
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
|
fmt = {'url': webpage_info['post']['podcast_url']}
|
||||||
|
if not determine_ext(fmt['url'], default_ext=None):
|
||||||
|
# The redirected format URL expires but the original URL doesn't,
|
||||||
|
# so we only want to extract the extension from this request
|
||||||
|
fmt['ext'] = determine_ext(self._request_webpage(
|
||||||
|
HEADRequest(fmt['url']), display_id,
|
||||||
|
'Resolving podcast file extension',
|
||||||
|
'Podcast URL is invalid').url)
|
||||||
|
formats.append(fmt)
|
||||||
elif post_type == 'video':
|
elif post_type == 'video':
|
||||||
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
|
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue