[extractor/cda] Support premium and misc improvements (#5529)

* Fix cache for non-ASCII key * Improve error messages * Better UA for fingerprint bypass Authored by: selfisekai
2024-11-07 20:30:41 -05:00 · 2022-12-27 20:57:26 +01:00 · 2022-12-27 20:57:26 +01:00 · da8d2de208
commit da8d2de208
parent 15e9e578c0
2 changed files with 44 additions and 12 deletions
--- a/yt_dlp/cache.py
+++ b/yt_dlp/cache.py
@ -5,6 +5,7 @@
 import re
 import shutil
 import traceback
 import urllib.parse
 from .utils import expand_path, traverse_obj, version_tuple, write_json_file
 from .version import __version__
@ -22,11 +23,9 @@ def _get_root_dir(self):
        return expand_path(res)
    def _get_cache_fn(self, section, key, dtype):
-        assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
+        assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
-            'invalid section %r' % section
+        key = urllib.parse.quote(key, safe='').replace('%', ',')  # encode non-ascii characters
-        assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
+        return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
        return os.path.join(
            self._get_root_dir(), section, f'{key}.{dtype}')
    @property
    def enabled(self):
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@ -4,6 +4,7 @@
 import hashlib
 import hmac
 import json
 import random
 import re
 from .common import InfoExtractor
@ -27,11 +28,10 @@ class CDAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
    _NETRC_MACHINE = 'cdapl'
-    _BASE_URL = 'http://www.cda.pl/'
+    _BASE_URL = 'https://www.cda.pl'
    _BASE_API_URL = 'https://api.cda.pl'
    _API_HEADERS = {
        'Accept': 'application/vnd.cda.public+json',
        'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
    }
    # hardcoded in the app
    _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
@ -101,6 +101,38 @@ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
            }, **kwargs)
    def _perform_login(self, username, password):
        app_version = random.choice((
            '1.2.88 build 15306',
            '1.2.174 build 18469',
        ))
        android_version = random.randrange(8, 14)
        phone_model = random.choice((
            # x-kom.pl top selling Android smartphones, as of 2022-12-26
            # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
            'ASUS ZenFone 8',
            'Motorola edge 20 5G',
            'Motorola edge 30 neo 5G',
            'Motorola moto g22',
            'OnePlus Nord 2T 5G',
            'Samsung Galaxy A32 SM‑A325F',
            'Samsung Galaxy M13',
            'Samsung Galaxy S20 FE 5G',
            'Xiaomi 11T',
            'Xiaomi POCO M4 Pro',
            'Xiaomi Redmi 10',
            'Xiaomi Redmi 10C',
            'Xiaomi Redmi 9C NFC',
            'Xiaomi Redmi Note 10 Pro',
            'Xiaomi Redmi Note 11 Pro',
            'Xiaomi Redmi Note 11',
            'Xiaomi Redmi Note 11S 5G',
            'Xiaomi Redmi Note 11S',
            'realme 10',
            'realme 9 Pro+',
            'vivo Y33s',
        ))
        self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
        cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
        if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
            self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
@ -138,9 +170,6 @@ def _api_extract(self, video_id):
        meta = self._download_json(
            f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
        if meta.get('premium') and not meta.get('premium_free'):
            self.report_drm(video_id)
        uploader = traverse_obj(meta, 'author', 'login')
        formats = [{
@ -151,6 +180,10 @@ def _api_extract(self, video_id):
            'filesize': quality.get('length'),
        } for quality in meta['qualities'] if quality.get('file')]
        if meta.get('premium') and not meta.get('premium_free') and not formats:
            raise ExtractorError(
                'Video requires CDA Premium - subscription needed', expected=True)
        return {
            'id': video_id,
            'title': meta.get('title'),
@ -167,10 +200,10 @@ def _api_extract(self, video_id):
    def _web_extract(self, video_id, url):
        self._set_cookie('cda.pl', 'cda.player', 'html5')
        webpage = self._download_webpage(
-            self._BASE_URL + '/video/' + video_id, video_id)
+            f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
-            raise ExtractorError('This video is only available for premium users.', expected=True)
+            self.raise_login_required('This video is only available for premium users')
        if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
            self.raise_geo_restricted()