From c2502cfed91415c7ccfff925fd3404d230046484 Mon Sep 17 00:00:00 2001 From: mrscrapy <116454146+mrscrapy@users.noreply.github.com> Date: Wed, 31 May 2023 04:41:21 +0100 Subject: [PATCH] [extractor/recurbate] Add extractor (#6297) Authored by: mrscrapy --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/recurbate.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/recurbate.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 85c584f5e..abe3c2288 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1594,6 +1594,7 @@ RCTIPlusTVIE, ) from .rds import RDSIE +from .recurbate import RecurbateIE from .redbee import ParliamentLiveUKIE, RTBFIE from .redbulltv import ( RedBullTVIE, diff --git a/yt_dlp/extractor/recurbate.py b/yt_dlp/extractor/recurbate.py new file mode 100644 index 000000000..5534cf3c3 --- /dev/null +++ b/yt_dlp/extractor/recurbate.py @@ -0,0 +1,43 @@ +import urllib.error + +from .common import InfoExtractor +from ..utils import ExtractorError, merge_dicts + + +class RecurbateIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P\d+)' + _TESTS = [{ + 'url': 'https://recurbate.com/play.php?video=39161415', + 'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f', + 'info_dict': { + 'id': '39161415', + 'ext': 'mp4', + 'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51', + 'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate', + 'age_limit': 18, + }, + 'skip': 'Website require membership.', + }] + + def _real_extract(self, url): + SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.' + video_id = self._match_id(url) + try: + webpage = self._download_webpage(url, video_id) + except ExtractorError as e: + if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403: + self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies') + raise + token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token') + video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}' + + video_webpage = self._download_webpage(video_url, video_id) + if video_webpage == 'shall_subscribe': + self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies') + entries = self._parse_html5_media_entries(video_url, video_webpage, video_id) + return merge_dicts({ + 'id': video_id, + 'title': self._html_extract_title(webpage, 'title'), + 'description': self._og_search_description(webpage), + 'age_limit': self._rta_search(webpage), + }, entries[0])