mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 20:38:11 -05:00
[ie/generic] Do not impersonate by default (fix edfd095b19
) (#11336)
Closes #11335 Authored by: bashonly
This commit is contained in:
parent
ec9b25043f
commit
c29f5a7fae
2 changed files with 29 additions and 9 deletions
|
@ -1791,7 +1791,7 @@ #### generic
|
||||||
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
||||||
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||||
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||||
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
|
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `generic:impersonate=safari,chrome-110`. Use `generic:impersonate` to impersonate any available target, and use `generic:impersonate=false` to disable impersonation (default)
|
||||||
|
|
||||||
#### funimation
|
#### funimation
|
||||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
from ..cookies import LenientSimpleCookie
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..networking.impersonate import ImpersonateTarget
|
from ..networking.impersonate import ImpersonateTarget
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
|
@ -2374,10 +2376,9 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
video_id = self._generic_id(url)
|
video_id = self._generic_id(url)
|
||||||
|
|
||||||
# Try to impersonate a web-browser by default if possible
|
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||||
# Skip impersonation if not available to omit the warning
|
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||||
impersonate = self._configuration_arg('impersonate', [''])
|
if 'false' in impersonate:
|
||||||
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
|
||||||
impersonate = None
|
impersonate = None
|
||||||
|
|
||||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||||
|
@ -2388,10 +2389,29 @@ def _real_extract(self, url):
|
||||||
# to accept raw bytes and being able to download only a chunk.
|
# to accept raw bytes and being able to download only a chunk.
|
||||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||||
# after a HEAD request, but not sure if we can rely on this.
|
# after a HEAD request, but not sure if we can rely on this.
|
||||||
|
try:
|
||||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||||
'Accept-Encoding': 'identity',
|
'Accept-Encoding': 'identity',
|
||||||
'Referer': smuggled_data.get('referer'),
|
'Referer': smuggled_data.get('referer'),
|
||||||
}), impersonate=impersonate)
|
}), impersonate=impersonate)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||||
|
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||||
|
and e.cause.response.extensions.get('impersonate') is None):
|
||||||
|
raise
|
||||||
|
cf_cookie_domain = traverse_obj(
|
||||||
|
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||||
|
('__cf_bm', 'domain'))
|
||||||
|
if cf_cookie_domain:
|
||||||
|
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||||
|
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||||
|
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||||
|
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||||
|
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||||
|
'how to install the required impersonation dependency, and ')
|
||||||
|
raise ExtractorError(
|
||||||
|
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||||
|
|
||||||
new_url = full_response.url
|
new_url = full_response.url
|
||||||
if new_url != extract_basic_auth(url)[0]:
|
if new_url != extract_basic_auth(url)[0]:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
|
|
Loading…
Reference in a new issue