#55 Add aria2c support for DASH (mpd) and HLS (m3u8)

Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com>
Co-authored-by: pukkandan <pukkandan@gmail.com>
This commit is contained in:
shirt-dev 2021-02-08 11:46:01 -05:00 committed by GitHub
parent ff84930c86
commit 5219cb3e75
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 35 deletions

View file

@ -1,11 +1,24 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..utils import (
determine_protocol,
)
def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
info_copy = info_dict.copy()
if protocol:
info_copy['protocol'] = protocol
return get_suitable_downloader(info_copy, *args, **kwargs)
# Some of these require _get_real_downloader
from .common import FileDownloader from .common import FileDownloader
from .dash import DashSegmentsFD
from .f4m import F4mFD from .f4m import F4mFD
from .hls import HlsFD from .hls import HlsFD
from .http import HttpFD from .http import HttpFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD from .ism import IsmFD
from .youtube_live_chat import YoutubeLiveChatReplayFD from .youtube_live_chat import YoutubeLiveChatReplayFD
@ -14,10 +27,6 @@
FFmpegFD, FFmpegFD,
) )
from ..utils import (
determine_protocol,
)
PROTOCOL_MAP = { PROTOCOL_MAP = {
'rtmp': RtmpFD, 'rtmp': RtmpFD,
'm3u8_native': HlsFD, 'm3u8_native': HlsFD,
@ -31,7 +40,7 @@
} }
def get_suitable_downloader(info_dict, params={}): def get_suitable_downloader(info_dict, params={}, default=HttpFD):
"""Get the downloader class that can handle the info dict.""" """Get the downloader class that can handle the info dict."""
protocol = determine_protocol(info_dict) protocol = determine_protocol(info_dict)
info_dict['protocol'] = protocol info_dict['protocol'] = protocol
@ -45,16 +54,17 @@ def get_suitable_downloader(info_dict, params={}):
if ed.can_download(info_dict): if ed.can_download(info_dict):
return ed return ed
if protocol.startswith('m3u8') and info_dict.get('is_live'): if protocol.startswith('m3u8'):
return FFmpegFD if info_dict.get('is_live'):
return FFmpegFD
elif _get_real_downloader(info_dict, 'frag_urls', params, None):
return HlsFD
elif params.get('hls_prefer_native') is True:
return HlsFD
elif params.get('hls_prefer_native') is False:
return FFmpegFD
if protocol == 'm3u8' and params.get('hls_prefer_native') is True: return PROTOCOL_MAP.get(protocol, default)
return HlsFD
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
return FFmpegFD
return PROTOCOL_MAP.get(protocol, HttpFD)
__all__ = [ __all__ = [

View file

@ -1,6 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..downloader import _get_real_downloader
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import ( from ..utils import (
DownloadError, DownloadError,
@ -20,31 +22,42 @@ def real_download(self, filename, info_dict):
fragments = info_dict['fragments'][:1] if self.params.get( fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments'] 'test', False) else info_dict['fragments']
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
ctx = { ctx = {
'filename': filename, 'filename': filename,
'total_frags': len(fragments), 'total_frags': len(fragments),
} }
self._prepare_and_start_frag_download(ctx) if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx)
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
fragment_urls = []
frag_index = 0 frag_index = 0
for i, fragment in enumerate(fragments): for i, fragment in enumerate(fragments):
frag_index += 1 frag_index += 1
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
if real_downloader:
fragment_urls.append(fragment_url)
continue
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = i == 0 or not skip_unavailable_fragments fatal = i == 0 or not skip_unavailable_fragments
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
return False return False
@ -75,6 +88,16 @@ def real_download(self, filename, info_dict):
self.report_error('giving up after %s fragment retries' % fragment_retries) self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
self._finish_frag_download(ctx) if real_downloader:
info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls
fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
success = fd.real_download(filename, info_copy)
if not success:
return False
else:
self._finish_frag_download(ctx)
return True return True

View file

@ -5,6 +5,13 @@
import subprocess import subprocess
import sys import sys
import time import time
import shutil
try:
from Crypto.Cipher import AES
can_decrypt_frag = True
except ImportError:
can_decrypt_frag = False
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
@ -18,15 +25,19 @@
cli_bool_option, cli_bool_option,
cli_configuration_args, cli_configuration_args,
encodeFilename, encodeFilename,
error_to_compat_str,
encodeArgument, encodeArgument,
handle_youtubedl_headers, handle_youtubedl_headers,
check_executable, check_executable,
is_outdated_version, is_outdated_version,
process_communicate_or_kill, process_communicate_or_kill,
sanitized_Request,
) )
class ExternalFD(FileDownloader): class ExternalFD(FileDownloader):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@ -79,7 +90,7 @@ def available(cls):
@classmethod @classmethod
def supports(cls, info_dict): def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
@classmethod @classmethod
def can_download(cls, info_dict): def can_download(cls, info_dict):
@ -109,8 +120,47 @@ def _call_downloader(self, tmpfilename, info_dict):
_, stderr = process_communicate_or_kill(p) _, stderr = process_communicate_or_kill(p)
if p.returncode != 0: if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace')) self.to_stderr(stderr.decode('utf-8', 'replace'))
if 'url_list' in info_dict:
file_list = []
for [i, url] in enumerate(info_dict['url_list']):
tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
file_list.append(tmpsegmentname)
with open(tmpfilename, 'wb') as dest:
for i in file_list:
if 'decrypt_info' in info_dict:
decrypt_info = info_dict['decrypt_info']
with open(i, 'rb') as src:
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV')
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
encrypted_data = src.read()
decrypted_data = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(encrypted_data)
dest.write(decrypted_data)
else:
shutil.copyfileobj(open(i, 'rb'), dest)
else:
shutil.copyfileobj(open(i, 'rb'), dest)
if not self.params.get('keep_fragments', False):
for file_path in file_list:
try:
os.remove(file_path)
except OSError as ose:
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
try:
file_path = '%s.frag.urls' % tmpfilename
os.remove(file_path)
except OSError as ose:
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
return p.returncode return p.returncode
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V' AVAILABLE_OPT = '-V'
@ -186,15 +236,17 @@ def _make_cmd(self, tmpfilename, info_dict):
class Aria2cFD(ExternalFD): class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v' AVAILABLE_OPT = '-v'
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c']
cmd += self._configuration_args([
'--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename) dn = os.path.dirname(tmpfilename)
if 'url_list' not in info_dict:
cmd += ['--out', os.path.basename(tmpfilename)]
verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
if dn: if dn:
cmd += ['--dir', dn] cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
@ -202,7 +254,21 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += self._option('--all-proxy', 'proxy') cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--', info_dict['url']] cmd += ['--auto-file-renaming=false']
if 'url_list' in info_dict:
cmd += verbose_level_args
cmd += ['--uri-selector', 'inorder', '--download-result=hide']
url_list_file = '%s.frag.urls' % tmpfilename
url_list = []
for [i, url] in enumerate(info_dict['url_list']):
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
with open(url_list_file, 'w') as f:
f.write('\n'.join(url_list))
cmd += ['-i', url_list_file]
else:
cmd += ['--', info_dict['url']]
return cmd return cmd
@ -221,9 +287,7 @@ def _make_cmd(self, tmpfilename, info_dict):
class FFmpegFD(ExternalFD): class FFmpegFD(ExternalFD):
@classmethod SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
@classmethod @classmethod
def available(cls): def available(cls):

View file

@ -277,3 +277,24 @@ def _finish_frag_download(self, ctx):
'status': 'finished', 'status': 'finished',
'elapsed': elapsed, 'elapsed': elapsed,
}) })
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
tmpfilename = self.temp_name(ctx['filename'])
# Should be initialized before ytdl file check
ctx.update({
'tmpfilename': tmpfilename,
'fragment_index': 0,
})

View file

@ -8,6 +8,7 @@
except ImportError: except ImportError:
can_decrypt_frag = False can_decrypt_frag = False
from ..downloader import _get_real_downloader
from .fragment import FragmentFD from .fragment import FragmentFD
from .external import FFmpegFD from .external import FFmpegFD
@ -73,10 +74,13 @@ def real_download(self, filename, info_dict):
'hlsnative has detected features it does not support, ' 'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg') 'extraction will be delegated to ffmpeg')
fd = FFmpegFD(self.ydl, self.params) fd = FFmpegFD(self.ydl, self.params)
for ph in self._progress_hooks: # TODO: Make progress updates work without hooking twice
fd.add_progress_hook(ph) # for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
@ -85,6 +89,8 @@ def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
fragment_urls = []
media_frags = 0 media_frags = 0
ad_frags = 0 ad_frags = 0
ad_frag_next = False ad_frag_next = False
@ -109,7 +115,10 @@ def is_ad_fragment_end(s):
'ad_frags': ad_frags, 'ad_frags': ad_frags,
} }
self._prepare_and_start_frag_download(ctx) if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx)
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
@ -140,6 +149,11 @@ def is_ad_fragment_end(s):
else compat_urlparse.urljoin(man_url, line)) else compat_urlparse.urljoin(man_url, line))
if extra_query: if extra_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_query)
if real_downloader:
fragment_urls.append(frag_url)
continue
count = 0 count = 0
headers = info_dict.get('http_headers', {}) headers = info_dict.get('http_headers', {})
if byte_range: if byte_range:
@ -168,6 +182,7 @@ def is_ad_fragment_end(s):
self.report_error( self.report_error(
'giving up after %s fragment retries' % fragment_retries) 'giving up after %s fragment retries' % fragment_retries)
return False return False
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
@ -211,6 +226,17 @@ def is_ad_fragment_end(s):
elif is_ad_fragment_end(line): elif is_ad_fragment_end(line):
ad_frag_next = False ad_frag_next = False
self._finish_frag_download(ctx) if real_downloader:
info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls
info_copy['decrypt_info'] = decrypt_info
fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
success = fd.real_download(filename, info_copy)
if not success:
return False
else:
self._finish_frag_download(ctx)
return True return True