mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
parent
226c0f3a54
commit
2c98d99818
3 changed files with 78 additions and 1 deletions
|
@ -1345,6 +1345,7 @@
|
||||||
PluralsightIE,
|
PluralsightIE,
|
||||||
PluralsightCourseIE,
|
PluralsightCourseIE,
|
||||||
)
|
)
|
||||||
|
from .podbayfm import PodbayFMIE, PodbayFMChannelIE
|
||||||
from .podchaser import PodchaserIE
|
from .podchaser import PodchaserIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pokemon import (
|
from .pokemon import (
|
||||||
|
|
75
yt_dlp/extractor/podbayfm.py
Normal file
75
yt_dlp/extractor/podbayfm.py
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
|
||||||
|
|
||||||
|
|
||||||
|
def result_from_props(props, episode_id=None):
|
||||||
|
return {
|
||||||
|
'id': props.get('podcast_id') or episode_id,
|
||||||
|
'title': props.get('title'),
|
||||||
|
'url': props['mediaURL'],
|
||||||
|
'ext': 'mp3',
|
||||||
|
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
|
||||||
|
'timestamp': props.get('timestamp'),
|
||||||
|
'duration': int_or_none(props.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PodbayFMIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
|
||||||
|
'md5': '98b41285dcf7989d105a4ed0404054cf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1647338400',
|
||||||
|
'title': 'Part One: Kissinger',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1647338400,
|
||||||
|
'duration': 5001,
|
||||||
|
'upload_date': '20220315',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
data = self._search_nextjs_data(webpage, episode_id)
|
||||||
|
return result_from_props(data['props']['pageProps']['episode'], episode_id)
|
||||||
|
|
||||||
|
|
||||||
|
class PodbayFMChannelIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://podbay.fm/p/behind-the-bastards',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'behind-the-bastards',
|
||||||
|
'title': 'Behind the Bastards',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 10
|
||||||
|
|
||||||
|
def _fetch_page(self, channel_id, pagenum):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
|
||||||
|
channel_id)['podcast']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _results_from_page(channel_id, page):
|
||||||
|
return [{
|
||||||
|
**result_from_props(e),
|
||||||
|
'extractor': PodbayFMIE.IE_NAME,
|
||||||
|
'extractor_key': PodbayFMIE.ie_key(),
|
||||||
|
# somehow they use timestamps as the episode identifier
|
||||||
|
'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
|
||||||
|
} for e in page['episodes']]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
|
first_page = self._fetch_page(channel_id, 0)
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
lambda pagenum: self._results_from_page(
|
||||||
|
channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, channel_id, first_page.get('title'))
|
|
@ -5499,7 +5499,8 @@ def jwt_encode_hs256(payload_data, key, headers={}):
|
||||||
# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
|
# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
|
||||||
def jwt_decode_hs256(jwt):
|
def jwt_decode_hs256(jwt):
|
||||||
header_b64, payload_b64, signature_b64 = jwt.split('.')
|
header_b64, payload_b64, signature_b64 = jwt.split('.')
|
||||||
payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
|
# add trailing ='s that may have been stripped, superfluous ='s are ignored
|
||||||
|
payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
|
||||||
return payload_data
|
return payload_data
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue