[core] Allow extractors to mark formats as potentially DRM (#7396)

This is useful for HLS where detecting whether the format is
actually DRM requires the child manifest to be downloaded.

Makes the error message when using `--test` inconsistent,
but doesn't really matter.
This commit is contained in:
pukkandan 2023-07-06 18:39:50 +05:30
parent 906c0bdcd8
commit bc344cd456
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
3 changed files with 46 additions and 31 deletions

View file

@ -983,6 +983,7 @@ def trouble(self, message=None, tb=None, is_error=True):
ID='green', ID='green',
DELIM='blue', DELIM='blue',
ERROR='red', ERROR='red',
BAD_FORMAT='light red',
WARNING='yellow', WARNING='yellow',
SUPPRESS='light black', SUPPRESS='light black',
) )
@ -2085,8 +2086,6 @@ def syntax_error(note, start):
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
'video': self.params.get('allow_multiple_video_streams', False)} 'video': self.params.get('allow_multiple_video_streams', False)}
check_formats = self.params.get('check_formats') == 'selected'
def _parse_filter(tokens): def _parse_filter(tokens):
filter_parts = [] filter_parts = []
for type, string_, start, _, _ in tokens: for type, string_, start, _, _ in tokens:
@ -2259,10 +2258,19 @@ def _merge(formats_pair):
return new_dict return new_dict
def _check_formats(formats): def _check_formats(formats):
if not check_formats: if (self.params.get('check_formats') is not None
or self.params.get('allow_unplayable_formats')):
yield from formats yield from formats
return return
yield from self._check_formats(formats) elif self.params.get('check_formats') == 'selected':
yield from self._check_formats(formats)
return
for f in formats:
if f.get('has_drm'):
yield from self._check_formats([f])
else:
yield f
def _build_selector_function(selector): def _build_selector_function(selector):
if isinstance(selector, list): # , if isinstance(selector, list): # ,
@ -2614,10 +2622,10 @@ def sanitize_numeric_fields(info):
if field_preference: if field_preference:
info_dict['_format_sort_fields'] = field_preference info_dict['_format_sort_fields'] = field_preference
# or None ensures --clean-infojson removes it info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
if not self.params.get('allow_unplayable_formats'): if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')] formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
self.report_warning( self.report_warning(
@ -3719,14 +3727,13 @@ def simplified_codec(f, field):
simplified_codec(f, 'acodec'), simplified_codec(f, 'acodec'),
format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'abr', '\t%dk', func=round),
format_field(f, 'asr', '\t%s', func=format_decimal_suffix), format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty( join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
self._format_out('DRM', 'light red') if f.get('has_drm') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
format_field(f, 'language', '[%s]'), else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
join_nonempty(format_field(f, 'format_note'), format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))), format_field(f, 'container', ignore=(None, f.get('ext'))),
delim=', '), delim=', '), delim=' '),
delim=' '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000] ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers( header_line = self._list_format_headers(
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

View file

@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'
@staticmethod @staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False): def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
return bool(re.search('|'.join((
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
)), manifest))
@classmethod
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@ -50,13 +59,15 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False):
] ]
if not allow_unplayable_formats: if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [ UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
] ]
def check_results(): def check_results():
yield not info_dict.get('is_live') yield not info_dict.get('is_live')
for feature in UNSUPPORTED_FEATURES: for feature in UNSUPPORTED_FEATURES:
yield not re.search(feature, manifest) yield not re.search(feature, manifest)
if not allow_unplayable_formats:
yield not cls._has_drm(manifest)
return all(check_results()) return all(check_results())
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
@ -81,14 +92,13 @@ def real_download(self, filename, info_dict):
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download: if not can_download:
has_drm = re.search('|'.join([ if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
r'#EXT-X-FAXS-CM:', # Adobe Flash Access if info_dict.get('has_drm') and self.params.get('test'):
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
]), s) else:
if has_drm and not self.params.get('allow_unplayable_formats'): self.report_error(
self.report_error( 'This format is DRM protected; Try selecting another format with --format or '
'This video is DRM protected; Try selecting another format with --format or ' 'add --check-formats to automatically fallback to the next best format', tb=False)
'add --check-formats to automatically fallback to the next best format')
return False return False
message = message or 'Unsupported features have been detected' message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params) fd = FFmpegFD(self.ydl, self.params)

View file

@ -26,6 +26,7 @@
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
from ..downloader.f4m import get_base_url, remove_encrypted_media from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..downloader.hls import HlsFD
from ..utils import ( from ..utils import (
IDENTITY, IDENTITY,
JSON_LD_RE, JSON_LD_RE,
@ -224,7 +225,8 @@ class InfoExtractor:
width : height ratio as float. width : height ratio as float.
* no_resume The server does not support resuming the * no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean. (HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean * has_drm True if the format has DRM and cannot be downloaded.
'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each * extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders. with. Only applied by the native HLS/DASH downloaders.
@ -1979,11 +1981,7 @@ def _parse_m3u8_formats_and_subtitles(
errnote=None, fatal=True, data=None, headers={}, query={}, errnote=None, fatal=True, data=None, headers={}, query={},
video_id=None): video_id=None):
formats, subtitles = [], {} formats, subtitles = [], {}
has_drm = HlsFD._has_drm(m3u8_doc)
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
]), m3u8_doc)
def format_url(url): def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)