From c6387abc1af9842bb0541288a5610abba9b1ab51 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:46:22 -0500 Subject: [PATCH] [cleanup] Misc (#10807) Closes #10751, Closes #10769, Closes #10791 Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K Co-authored-by: Codenade Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> Co-authored-by: coletdjnz Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: Simon Sawicki --- .github/workflows/quick-test.yml | 2 +- .github/workflows/release.yml | 4 ++-- README.md | 30 ++++++++++++++-------------- pyproject.toml | 2 +- yt_dlp/downloader/external.py | 4 ++-- yt_dlp/extractor/academicearth.py | 2 +- yt_dlp/extractor/ard.py | 4 ++-- yt_dlp/extractor/callin.py | 2 +- yt_dlp/extractor/common.py | 8 ++++---- yt_dlp/extractor/fc2.py | 2 +- yt_dlp/extractor/generic.py | 4 ++-- yt_dlp/extractor/getcourseru.py | 2 +- yt_dlp/extractor/golem.py | 2 +- yt_dlp/extractor/hrfensehen.py | 2 +- yt_dlp/extractor/japandiet.py | 7 +++++-- yt_dlp/extractor/kaltura.py | 2 +- yt_dlp/extractor/mailru.py | 2 +- yt_dlp/extractor/mgtv.py | 2 +- yt_dlp/extractor/mit.py | 2 +- yt_dlp/extractor/nzonscreen.py | 2 +- yt_dlp/extractor/pornhub.py | 3 +-- yt_dlp/extractor/radiofrance.py | 2 +- yt_dlp/extractor/reverbnation.py | 2 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/twitcasting.py | 2 +- yt_dlp/extractor/viu.py | 2 +- yt_dlp/extractor/ximalaya.py | 2 +- yt_dlp/networking/_websockets.py | 4 ++-- yt_dlp/options.py | 10 +++++----- yt_dlp/postprocessor/sponsorblock.py | 2 +- yt_dlp/utils/_utils.py | 6 +++--- 31 files changed, 63 insertions(+), 61 deletions(-) diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index fe2a7e923..1571d3cab 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -15,7 +15,7 @@ jobs: with: python-version: '3.8' - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include test + run: python3 ./devscripts/install_deps.py -o --include test - name: Run tests timeout-minutes: 15 run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fa5ad7e51..8d0bc4026 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -204,7 +204,7 @@ jobs: git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" git add -u git commit -m "Release ${{ env.version }}" \ - -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" + -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all" git push origin --force ${{ github.event.ref }}:release - name: Get target commitish @@ -325,7 +325,7 @@ jobs: "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES printf '\n\n' >> ./RELEASE_NOTES cat >> ./RELEASE_NOTES << EOF - #### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files) + #### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files) --- $(python ./devscripts/make_changelog.py -vv --collapsible) EOF diff --git a/README.md b/README.md index 1d6a4a86d..3e76a4efb 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,7 @@ #### Impersonation The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. -* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) +* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE) * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds @@ -459,17 +459,17 @@ ## Video Selection: conditions. Use a "\" to escape "&" or quotes if needed. If used multiple times, the filter matches if at least one of the - conditions is met. E.g. --match-filter - !is_live --match-filter "like_count>?100 & + conditions is met. E.g. --match-filters + !is_live --match-filters "like_count>?100 & description~='(?i)\bcats \& dogs\b'" matches only videos that are not live OR those that have a like count more than 100 (or the like field is not available) and also has a description that contains the phrase "cats & - dogs" (caseless). Use "--match-filter -" to + dogs" (caseless). Use "--match-filters -" to interactively ask whether to download each video - --no-match-filters Do not use any --match-filter (default) + --no-match-filters Do not use any --match-filters (default) --break-match-filters FILTER Same as "--match-filters" but stops the download process when a video is rejected --no-break-match-filters Do not use any --break-match-filters (default) @@ -490,7 +490,7 @@ ## Video Selection: encountering a file that is in the archive (default) --break-per-input Alters --max-downloads, --break-on-existing, - --break-match-filter, and autonumber to + --break-match-filters, and autonumber to reset per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue @@ -1771,7 +1771,7 @@ # EXTRACTOR ARGUMENTS #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -2184,9 +2184,9 @@ ### New features * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` -* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc -* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc +* **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details @@ -2227,7 +2227,7 @@ ### Differences in default behavior * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ -* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this +* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. @@ -2273,11 +2273,11 @@ #### Redundant options --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls - --match-title REGEX --match-filter "title ~= (?i)REGEX" - --reject-title REGEX --match-filter "title !~= (?i)REGEX" - --min-views COUNT --match-filter "view_count >=? COUNT" - --max-views COUNT --match-filter "view_count <=? COUNT" - --break-on-reject Use --break-match-filter + --match-title REGEX --match-filters "title ~= (?i)REGEX" + --reject-title REGEX --match-filters "title !~= (?i)REGEX" + --min-views COUNT --match-filters "view_count >=? COUNT" + --max-views COUNT --match-filters "view_count <=? COUNT" + --break-on-reject Use --break-match-filters --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: diff --git a/pyproject.toml b/pyproject.toml index 18d9a0a3a..f54980d57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.5.0", + "ruff~=0.6.0", ] test = [ "pytest~=8.1", diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ae2372915..6c1ec403c 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -508,7 +508,7 @@ def _call_downloader(self, tmpfilename, info_dict): env = None proxy = self.params.get('proxy') if proxy: - if not re.match(r'^[\da-zA-Z]+://', proxy): + if not re.match(r'[\da-zA-Z]+://', proxy): proxy = f'http://{proxy}' if proxy.startswith('socks'): @@ -559,7 +559,7 @@ def _call_downloader(self, tmpfilename, info_dict): selected_formats = info_dict.get('requested_formats') or [info_dict] for i, fmt in enumerate(selected_formats): - is_http = re.match(r'^https?://', fmt['url']) + is_http = re.match(r'https?://', fmt['url']) cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] if cookies: args.extend(['-cookies', ''.join( diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py index d9691cb5c..b997a0288 100644 --- a/yt_dlp/extractor/academicearth.py +++ b/yt_dlp/extractor/academicearth.py @@ -4,7 +4,7 @@ class AcademicEarthCourseIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' + _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' IE_NAME = 'AcademicEarth:Course' _TEST = { 'url': 'http://academicearth.org/playlists/laws-of-nature/', diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 6fd641347..efc79dd14 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -231,7 +231,7 @@ def _real_extract(self, url): class ARDBetaMediathekIE(InfoExtractor): IE_NAME = 'ARDMediathek' - _VALID_URL = r'''(?x)https:// + _VALID_URL = r'''(?x)https?:// (?:(?:beta|www)\.)?ardmediathek\.de/ (?:[^/]+/)? (?:player|live|video)/ @@ -470,7 +470,7 @@ def _real_extract(self, url): class ARDMediathekCollectionIE(InfoExtractor): - _VALID_URL = r'''(?x)https:// + _VALID_URL = r'''(?x)https?:// (?:(?:beta|www)\.)?ardmediathek\.de/ (?:[^/?#]+/)? (?Psendung|serie|sammlung)/ diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index b7061a7d1..ee2e56f8e 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -3,7 +3,7 @@ class CallinIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P[-a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P[-a-zA-Z]+)' _TESTS = [{ 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc', 'info_dict': { diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9501e5ec9..486a4ea3c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2077,7 +2077,7 @@ def _parse_m3u8_formats_and_subtitles( has_drm = HlsFD._has_drm(m3u8_doc) def format_url(url): - return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) + return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url) if self.get_param('hls_split_discontinuity', False): def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None): @@ -2812,11 +2812,11 @@ def extract_Initialization(source): base_url_e = element.find(_add_ns('BaseURL')) if try_call(lambda: base_url_e.text) is not None: base_url = base_url_e.text + base_url - if re.match(r'^https?://', base_url): + if re.match(r'https?://', base_url): break if mpd_base_url and base_url.startswith('/'): base_url = urllib.parse.urljoin(mpd_base_url, base_url) - elif mpd_base_url and not re.match(r'^https?://', base_url): + elif mpd_base_url and not re.match(r'https?://', base_url): if not mpd_base_url.endswith('/'): mpd_base_url += '/' base_url = mpd_base_url + base_url @@ -2906,7 +2906,7 @@ def prepare_template(template_name, identifiers): } def location_key(location): - return 'url' if re.match(r'^https?://', location) else 'path' + return 'url' if re.match(r'https?://', location) else 'path' if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index eac70f6a9..f7b883155 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -14,7 +14,7 @@ class FC2IE(InfoExtractor): - _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' + _VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 04cffaa86..592800287 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2340,7 +2340,7 @@ def _real_extract(self, url): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if re.match(r'^[^\s/]+\.[^\s/]+/', url): + if re.match(r'[^\s/]+\.[^\s/]+/', url): self.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': @@ -2400,7 +2400,7 @@ def _real_extract(self, url): # Check for direct link to a video content_type = full_response.headers.get('Content-Type', '').lower() - m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) + m = re.match(r'(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) if m: self.report_detected('direct video link') headers = filter_dict({'Referer': smuggled_data.get('referer')}) diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py index 53b881011..b7581d77e 100644 --- a/yt_dlp/extractor/getcourseru.py +++ b/yt_dlp/extractor/getcourseru.py @@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor): _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _VALID_URL = [ rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P[^?#]+)', - rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', + rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', ] _TESTS = [{ 'url': 'http://academymel.online/3video_1', diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 90d2fe6c2..964bf6519 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -7,7 +7,7 @@ class GolemIE(InfoExtractor): - _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P.+?)/' + _VALID_URL = r'https?://video\.golem\.de/.+?/(?P.+?)/' _TEST = { 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index 17673d5b8..b5a7b14a5 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -13,7 +13,7 @@ class HRFernsehenIE(InfoExtractor): IE_NAME = 'hrfernsehen' - _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html' + _VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html' _TESTS = [{ 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html', 'md5': '5c4e0ba94677c516a2f65a84110fc536', diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 2ef091aff..994da22ae 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -194,11 +194,14 @@ def _real_extract(self, url): class SangiinInstructionIE(InfoExtractor): - _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' + _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' IE_DESC = False # this shouldn't be listed as a supported site def _real_extract(self, url): - raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True) + raise ExtractorError( + 'Copy the link from the button below the video description/player ' + 'and use that link to download. If there is no button in the frame, ' + 'get the URL of the frame showing the video.', expected=True) class SangiinIE(InfoExtractor): diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index e5737b1e9..6d51e32f6 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\w+):(?P\w+)(?::(?P\w+))?| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ + (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index cca678f14..0496a87f0 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -126,7 +126,7 @@ def _real_extract(self, url): video_data = None # fix meta_url if missing the host address - if re.match(r'^\/\+\/', meta_url): + if re.match(r'\/\+\/', meta_url): meta_url = urljoin('https://my.mail.ru', meta_url) if meta_url: diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index d5dda06f9..c793626fd 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -16,7 +16,7 @@ class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P\d+)\.html' + _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P\d+)\.html' IE_DESC = '芒果TV' IE_NAME = 'MangoTV' diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index e75c540a2..66c3b0793 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -65,7 +65,7 @@ def _real_extract(self, url): class OCWMITIE(InfoExtractor): IE_NAME = 'ocw.mit.edu' - _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)' + _VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)' _BASE_URL = 'http://ocw.mit.edu/' _TESTS = [ diff --git a/yt_dlp/extractor/nzonscreen.py b/yt_dlp/extractor/nzonscreen.py index 5fc516daf..755039804 100644 --- a/yt_dlp/extractor/nzonscreen.py +++ b/yt_dlp/extractor/nzonscreen.py @@ -10,7 +10,7 @@ class NZOnScreenIE(InfoExtractor): - _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P[^/?#]+)' + _VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982', 'info_dict': { diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 679dc6323..e1e9777e8 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -628,8 +628,7 @@ def is_404(e): page_entries = self._extract_entries(webpage, host) if not page_entries: break - for e in page_entries: - yield e + yield from page_entries if not self._has_more(webpage): break diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index ff2196354..9d9043984 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -16,7 +16,7 @@ class RadioFranceIE(InfoExtractor): - _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P[^?#]+)' + _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P[^?#]+)' IE_NAME = 'radiofrance' _TEST = { diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index ddf8c3753..f3bcc2c32 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -6,7 +6,7 @@ class ReverbNationIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' + _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index c5ca208fb..0d721773e 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -8,7 +8,7 @@ class Tele13IE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P[\w-]+)' _TESTS = [ { 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 53b408469..bf9c6348c 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -270,7 +270,7 @@ def _real_extract(self, url): class TwitCastingUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P[^/?#]+)/(:?show|archive)/?(?:[#?]|$)' + _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P[^/?#]+)/(?:show|archive)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://twitcasting.tv/natsuiromatsuri/archive/', 'info_dict': { diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 01e59352b..f4ed96bf6 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -90,7 +90,7 @@ def _real_extract(self, url): formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') for key, value in video_data.items(): - mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key) + mobj = re.match(r'subtitle_(?P[^_]+)_(?P(vtt|srt))', key) if not mobj: continue subtitles.setdefault(mobj.group('lang'), []).append({ diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index d63964a00..02bf6a7be 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -21,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor): class XimalayaIE(XimalayaBaseIE): IE_NAME = 'ximalaya' IE_DESC = '喜马拉雅FM' - _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P\d+)/)?sound/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P\d+)/)?sound/(?P[0-9]+)' _TESTS = [ { 'url': 'http://www.ximalaya.com/sound/47740352/', diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py index 21b765b91..ec55567da 100644 --- a/yt_dlp/networking/_websockets.py +++ b/yt_dlp/networking/_websockets.py @@ -33,8 +33,8 @@ import websockets.version websockets_version = tuple(map(int_or_none, websockets.version.version.split('.'))) -if websockets_version < (12, 0): - raise ImportError('Only websockets>=12.0 is supported') +if websockets_version < (13, 0): + raise ImportError('Only websockets>=13.0 is supported') import websockets.sync.client from websockets.uri import parse_uri diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 8077d5d88..9980b7fc3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -647,16 +647,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'You can also simply specify a field to match if the field is present, ' 'use "!field" to check if the field is not present, and "&" to check multiple conditions. ' 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, ' - 'the filter matches if at least one of the conditions is met. E.g. --match-filter ' - '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' + 'the filter matches if at least one of the conditions is met. E.g. --match-filters ' + '!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' 'matches only videos that are not live OR those that have a like count more than 100 ' '(or the like field is not available) and also has a description ' 'that contains the phrase "cats & dogs" (caseless). ' - 'Use "--match-filter -" to interactively ask whether to download each video')) + 'Use "--match-filters -" to interactively ask whether to download each video')) selection.add_option( '--no-match-filters', dest='match_filter', action='store_const', const=None, - help='Do not use any --match-filter (default)') + help='Do not use any --match-filters (default)') selection.add_option( '--break-match-filters', metavar='FILTER', dest='breaking_match_filter', action='append', @@ -704,7 +704,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): selection.add_option( '--break-per-input', action='store_true', dest='break_per_url', default=False, - help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL') + help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL') selection.add_option( '--no-break-per-input', action='store_false', dest='break_per_url', diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 6cf9ab62e..b3fc8b54a 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor): def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): FFmpegPostProcessor.__init__(self, downloader) self._categories = tuple(categories or self.CATEGORIES.keys()) - self._API_URL = api if re.match('^https?://', api) else 'https://' + api + self._API_URL = api if re.match('https?://', api) else 'https://' + api def run(self, info): extractor = info['extractor_key'] diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 184794f95..e1b3c48d6 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1954,7 +1954,7 @@ def urljoin(base, path): path = path.decode() if not isinstance(path, str) or not path: return None - if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): + if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): base = base.decode() @@ -2007,7 +2007,7 @@ def url_or_none(url): if not url or not isinstance(url, str): return None url = url.strip() - return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None + return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): @@ -3113,7 +3113,7 @@ def is_html(first_bytes): while first_bytes.startswith(bom): encoding, first_bytes = enc, first_bytes[len(bom):] - return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace')) + return re.match(r'\s*<', first_bytes.decode(encoding, 'replace')) def determine_protocol(info_dict):