2021-06-03 05:43:42 -04:00
#!/usr/bin/env python3
2016-10-02 07:39:18 -04:00
# coding: utf-8
2013-06-18 16:14:21 -04:00
2014-01-04 19:52:03 -05:00
from __future__ import absolute_import , unicode_literals
2013-06-18 16:14:21 -04:00
2013-12-09 16:00:42 -05:00
import collections
2015-03-01 05:46:57 -05:00
import contextlib
2016-07-15 13:55:43 -04:00
import copy
2014-03-13 10:30:25 -04:00
import datetime
2013-10-05 22:27:09 -04:00
import errno
2015-03-01 05:46:57 -05:00
import fileinput
2013-06-18 16:14:21 -04:00
import io
2014-12-06 08:02:19 -05:00
import itertools
2013-11-20 00:18:24 -05:00
import json
2014-03-30 00:02:41 -04:00
import locale
2015-01-22 18:04:05 -05:00
import operator
2013-06-18 16:14:21 -04:00
import os
2013-11-22 13:57:52 -05:00
import platform
2013-06-18 16:14:21 -04:00
import re
import shutil
2013-11-22 13:57:52 -05:00
import subprocess
2013-06-18 16:14:21 -04:00
import sys
2021-06-12 11:18:06 -04:00
import tempfile
2013-06-18 16:14:21 -04:00
import time
2015-06-28 16:08:29 -04:00
import tokenize
2013-06-18 16:14:21 -04:00
import traceback
2017-01-31 04:03:31 -05:00
import random
2013-06-18 16:14:21 -04:00
2017-07-14 20:02:14 -04:00
from string import ascii_letters
2021-02-14 12:10:54 -05:00
from zipimport import zipimporter
2017-07-14 20:02:14 -04:00
2014-11-02 05:23:40 -05:00
from . compat import (
2015-11-19 16:08:34 -05:00
compat_basestring ,
2015-02-28 15:42:16 -05:00
compat_get_terminal_size ,
2014-12-14 19:06:25 -05:00
compat_kwargs ,
2016-03-05 16:52:42 -05:00
compat_numeric_types ,
2016-03-03 06:24:24 -05:00
compat_os_name ,
2021-09-17 15:21:27 -04:00
compat_pycrypto_AES ,
2021-07-28 22:56:17 -04:00
compat_shlex_quote ,
2013-11-17 10:47:52 -05:00
compat_str ,
2015-06-28 16:08:29 -04:00
compat_tokenize_tokenize ,
2013-11-17 10:47:52 -05:00
compat_urllib_error ,
compat_urllib_request ,
2015-10-17 11:16:40 -04:00
compat_urllib_request_DataHandler ,
2014-11-02 05:23:40 -05:00
)
2021-07-21 16:32:49 -04:00
from . cookies import load_cookies
2014-11-02 05:23:40 -05:00
from . utils import (
2016-03-26 09:40:33 -04:00
age_restricted ,
args_to_str ,
2013-11-17 10:47:52 -05:00
ContentTooShortError ,
date_from_str ,
DateRange ,
2014-04-30 04:02:03 -04:00
DEFAULT_OUTTMPL ,
2013-11-17 10:47:52 -05:00
determine_ext ,
2016-01-15 23:10:28 -05:00
determine_protocol ,
2020-10-27 06:37:21 -04:00
DOT_DESKTOP_LINK_TEMPLATE ,
DOT_URL_LINK_TEMPLATE ,
DOT_WEBLOC_LINK_TEMPLATE ,
2013-11-17 10:47:52 -05:00
DownloadError ,
2015-12-19 19:29:36 -05:00
encode_compat_str ,
2013-11-17 10:47:52 -05:00
encodeFilename ,
2021-03-23 15:45:53 -04:00
EntryNotInPlaylist ,
2021-05-17 08:23:08 -04:00
error_to_compat_str ,
2021-01-12 20:01:01 -05:00
ExistingVideoReached ,
2017-03-25 15:31:16 -04:00
expand_path ,
2013-11-17 10:47:52 -05:00
ExtractorError ,
2021-02-02 16:15:00 -05:00
float_or_none ,
2013-11-24 21:12:26 -05:00
format_bytes ,
2020-12-13 09:29:09 -05:00
format_field ,
2021-07-28 19:49:26 -04:00
STR_FORMAT_RE_TMPL ,
STR_FORMAT_TYPES ,
2013-12-15 22:15:10 -05:00
formatSeconds ,
2017-02-04 06:49:58 -05:00
GeoRestrictedError ,
2021-06-12 11:21:00 -04:00
HEADRequest ,
2017-06-08 11:53:14 -04:00
int_or_none ,
2020-10-27 06:37:21 -04:00
iri_to_uri ,
2017-02-04 06:49:58 -05:00
ISO3166Utils ,
2021-05-28 12:37:11 -04:00
LazyList ,
2013-11-17 10:47:52 -05:00
locked_file ,
2021-01-23 07:18:12 -05:00
make_dir ,
2013-11-22 13:57:52 -05:00
make_HTTPS_handler ,
2013-11-17 10:47:52 -05:00
MaxDownloadsReached ,
2021-05-04 13:06:18 -04:00
network_exceptions ,
[YoutubeDL] Ignore duplicates in --playlist-items
E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]'
2017-10-06 12:46:57 -04:00
orderedSet ,
2021-05-17 08:23:08 -04:00
OUTTMPL_TYPES ,
2014-01-20 05:36:47 -05:00
PagedList ,
2015-01-22 18:04:05 -05:00
parse_filesize ,
2015-03-02 18:03:06 -05:00
PerRequestProxyHandler ,
2013-11-22 13:57:52 -05:00
platform_name ,
2016-03-26 09:40:33 -04:00
PostProcessingError ,
2013-11-17 10:47:52 -05:00
preferredencoding ,
2016-03-26 09:40:33 -04:00
prepend_extension ,
2021-05-17 08:23:08 -04:00
process_communicate_or_kill ,
2016-05-03 03:15:32 -04:00
register_socks_protocols ,
2021-05-17 08:23:08 -04:00
RejectedVideoReached ,
2015-01-24 20:38:47 -05:00
render_table ,
2016-03-26 09:40:33 -04:00
replace_extension ,
2013-11-17 10:47:52 -05:00
SameFileError ,
sanitize_filename ,
2015-03-08 10:57:30 -04:00
sanitize_path ,
2016-03-26 09:37:41 -04:00
sanitize_url ,
2015-11-20 09:33:49 -05:00
sanitized_Request ,
2015-01-24 12:52:26 -05:00
std_headers ,
2019-02-07 13:08:48 -05:00
str_or_none ,
2021-02-02 16:15:00 -05:00
strftime_or_none ,
2013-11-17 10:47:52 -05:00
subtitles_filename ,
2021-06-22 19:11:09 -04:00
ThrottledDownload ,
2020-10-27 06:37:21 -04:00
to_high_limit_path ,
2021-06-08 04:53:56 -04:00
traverse_obj ,
2021-06-24 10:38:43 -04:00
try_get ,
2013-11-17 10:47:52 -05:00
UnavailableVideoError ,
2013-12-16 22:13:36 -05:00
url_basename ,
2021-07-28 22:56:17 -04:00
variadic ,
2015-01-10 15:02:27 -05:00
version_tuple ,
2013-11-17 10:47:52 -05:00
write_json_file ,
write_string ,
2015-09-05 20:21:33 -04:00
YoutubeDLCookieProcessor ,
2013-11-22 13:57:52 -05:00
YoutubeDLHandler ,
2020-02-29 07:08:44 -05:00
YoutubeDLRedirectHandler ,
2013-11-17 10:47:52 -05:00
)
2014-09-03 06:41:05 -04:00
from . cache import Cache
2021-04-10 11:08:33 -04:00
from . extractor import (
gen_extractor_classes ,
get_info_extractor ,
_LAZY_LOADER ,
_PLUGIN_CLASSES
)
2017-09-23 13:08:27 -04:00
from . extractor . openload import PhantomJSwrapper
2021-04-10 11:08:33 -04:00
from . downloader import (
2021-07-31 06:51:01 -04:00
FFmpegFD ,
2021-04-10 11:08:33 -04:00
get_suitable_downloader ,
shorten_protocol_name
)
2014-11-02 04:55:36 -05:00
from . downloader . rtmp import rtmpdump_version
2014-12-14 19:06:25 -05:00
from . postprocessor import (
2021-06-21 13:23:17 -04:00
get_postprocessor ,
FFmpegFixupDurationPP ,
2016-03-01 15:08:50 -05:00
FFmpegFixupM3u8PP ,
2015-01-23 12:39:12 -05:00
FFmpegFixupM4aPP ,
2015-01-09 23:45:51 -05:00
FFmpegFixupStretchedPP ,
2021-06-21 13:23:17 -04:00
FFmpegFixupTimestampPP ,
2014-12-14 19:06:25 -05:00
FFmpegMergerPP ,
FFmpegPostProcessor ,
2021-01-23 07:18:12 -05:00
MoveFilesAfterDownloadPP ,
2014-12-14 19:06:25 -05:00
)
2013-11-22 13:57:52 -05:00
from . version import __version__
2013-06-18 16:14:21 -04:00
2016-03-03 06:24:24 -05:00
if compat_os_name == ' nt ' :
import ctypes
2020-09-18 09:35:21 -04:00
2013-06-18 16:14:21 -04:00
class YoutubeDL ( object ) :
""" YoutubeDL class.
YoutubeDL objects are the ones responsible of downloading the
actual video file and writing it to disk if the user has requested
it , among some other tasks . In most cases there should be one per
program . As , given a video URL , the downloader doesn ' t know how to
extract all the needed information , task that InfoExtractors do , it
has to pass the URL to one of them .
For this , YoutubeDL objects have a method that allows
InfoExtractors to be registered in a given order . When it is passed
a URL , the YoutubeDL object handles it to the first InfoExtractor it
finds that reports being able to handle it . The InfoExtractor extracts
all the information about the video or videos the URL refers to , and
YoutubeDL process the extracted information , possibly using a File
Downloader to download the video .
YoutubeDL objects accept a lot of parameters . In order not to saturate
the object constructor with arguments , it receives a dictionary of
options instead . These options are available through the params
attribute for the InfoExtractors to use . The YoutubeDL also
registers itself as the downloader in charge for the InfoExtractors
that are added to it , so this is a " mutual registration " .
Available options :
username : Username for authentication purposes .
password : Password for authentication purposes .
2015-06-14 01:49:42 -04:00
videopassword : Password for accessing a video .
2016-09-15 11:24:55 -04:00
ap_mso : Adobe Pass multiple - system operator identifier .
ap_username : Multiple - system operator account username .
ap_password : Multiple - system operator account password .
2013-06-18 16:14:21 -04:00
usenetrc : Use netrc for authentication instead .
verbose : Print additional info to stdout .
quiet : Do not print messages to stdout .
2014-03-25 19:43:46 -04:00
no_warnings : Do not print out anything for warnings .
2021-05-14 03:44:38 -04:00
forceprint : A list of templates to force print
forceurl : Force printing final URL . ( Deprecated )
forcetitle : Force printing title . ( Deprecated )
forceid : Force printing ID . ( Deprecated )
forcethumbnail : Force printing thumbnail URL . ( Deprecated )
forcedescription : Force printing description . ( Deprecated )
forcefilename : Force printing final filename . ( Deprecated )
forceduration : Force printing duration . ( Deprecated )
2013-11-20 00:18:24 -05:00
forcejson : Force printing info_dict as JSON .
2014-10-24 18:30:57 -04:00
dump_single_json : Force printing the info_dict of the whole playlist
( or video ) as a single JSON line .
2021-02-04 17:53:04 -05:00
force_write_download_archive : Force writing download archive regardless
of ' skip_download ' or ' simulate ' .
2021-08-06 20:01:51 -04:00
simulate : Do not download the video files . If unset ( or None ) ,
simulate only if listsubtitles , listformats or list_thumbnails is used
Better Format Sorting (Squashed)
* Added --format-sort (-S height,filesize)
* Made fields reversible (-S +height)
* Added --format-sort-force, --no-format-sort-force
* Added limit (-S height:720)
* Added codec preference (-S vcodec,acodec)
* Correct handling of preference<-1000
* Rebased to yt-dlc
* Automatically determine missing bitrates
* aext, vext, protocol, acodec, vcodec can now takes priority as string, not number (-S vext:webm)
* Correct handling of None in codec, audio_codec (None means the codec is unknown while 'none' means it doesn't exist)
* Correctly parse filesize (-S filesize:200M)
* Generalized preference calculation
* Rewrote entire code into the class FormatSort
* Correctly handle user input errors
* Combined fields (-S +ext:webm:webm)
* Closest mode (-S filesize~50M)
* Aliases (framerate=fps, br=bitrate etc)
* Documentation
2020-10-26 11:50:09 -04:00
format : Video format code . see " FORMAT SELECTION " for more details .
2021-02-11 22:51:59 -05:00
allow_unplayable_formats : Allow unplayable formats to be extracted and downloaded .
2021-04-16 20:09:58 -04:00
ignore_no_formats_error : Ignore " No video formats " error . Usefull for
extracting metadata even if the video is not actually
available for download ( experimental )
2021-02-04 17:53:04 -05:00
format_sort : How to sort the video formats . see " Sorting Formats "
for more details .
format_sort_force : Force the given format_sort . see " Sorting Formats "
for more details .
allow_multiple_video_streams : Allow multiple video streams to be merged
into a single file
allow_multiple_audio_streams : Allow multiple audio streams to be merged
into a single file
2021-07-15 13:19:59 -04:00
check_formats Whether to test if the formats are downloadable .
Can be True ( check all ) , False ( check none )
or None ( check only if requested by extractor )
2021-02-19 16:33:17 -05:00
paths : Dictionary of output paths . The allowed keys are ' home '
' temp ' and the keys of OUTTMPL_TYPES ( in utils . py )
2021-02-03 08:36:09 -05:00
outtmpl : Dictionary of templates for output names . Allowed keys
2021-02-19 16:33:17 -05:00
are ' default ' and the keys of OUTTMPL_TYPES ( in utils . py ) .
2021-08-07 04:49:17 -04:00
For compatibility with youtube - dl , a single string can also be used
2021-01-16 12:12:05 -05:00
outtmpl_na_placeholder : Placeholder for unavailable meta fields .
restrictfilenames : Do not allow " & " and spaces in file names
trim_file_name : Limit length of filename ( extension excluded )
2021-02-19 16:33:17 -05:00
windowsfilenames : Force the filenames to be windows compatible
2021-01-16 12:12:05 -05:00
ignoreerrors : Do not stop on download errors
2021-02-24 13:45:56 -05:00
( Default True when running yt - dlp ,
2021-01-16 12:12:05 -05:00
but False when directly accessing YoutubeDL class )
2021-04-21 02:00:43 -04:00
skip_playlist_after_errors : Number of allowed failures until the rest of
the playlist is skipped
2015-06-12 09:20:12 -04:00
force_generic_extractor : Force downloader to use the generic extractor
2019-10-13 12:00:48 -04:00
overwrites : Overwrite all video and metadata files if True ,
overwrite only non - video files if None
and don ' t overwrite any file if False
2021-08-07 04:49:17 -04:00
For compatibility with youtube - dl ,
" nooverwrites " may also be used instead
2013-06-18 16:14:21 -04:00
playliststart : Playlist item to start at .
playlistend : Playlist item to end at .
2015-01-24 22:24:55 -05:00
playlist_items : Specific indices of playlist to download .
2014-07-10 23:11:11 -04:00
playlistreverse : Download playlist items in reverse order .
2017-01-31 04:03:31 -05:00
playlistrandom : Download playlist items in random order .
2013-06-18 16:14:21 -04:00
matchtitle : Download only matching titles .
rejecttitle : Reject downloads for matching titles .
2013-11-24 00:08:11 -05:00
logger : Log messages to a logging . Logger instance .
2013-06-18 16:14:21 -04:00
logtostderr : Log messages to stderr instead of stdout .
writedescription : Write the video description to a . description file
writeinfojson : Write the video description to a . info . json file
2021-03-18 11:27:20 -04:00
clean_infojson : Remove private fields from the infojson
2021-08-07 04:49:17 -04:00
getcomments : Extract video comments . This will not be written to disk
2021-01-27 10:02:51 -05:00
unless writeinfojson is also given
2013-10-14 01:18:58 -04:00
writeannotations : Write the video annotations to a . annotations . xml file
2013-06-18 16:14:21 -04:00
writethumbnail : Write the thumbnail image to a file
2021-02-04 17:53:04 -05:00
allow_playlist_files : Whether to write playlists ' description, infojson etc
also to disk when using the ' write* ' options
2015-01-24 21:11:12 -05:00
write_all_thumbnails : Write all thumbnail formats to files
2020-10-27 06:37:21 -04:00
writelink : Write an internet shortcut file , depending on the
current platform ( . url / . webloc / . desktop )
writeurllink : Write a Windows internet shortcut file ( . url )
writewebloclink : Write a macOS internet shortcut file ( . webloc )
writedesktoplink : Write a Linux internet shortcut file ( . desktop )
2013-06-18 16:14:21 -04:00
writesubtitles : Write the video subtitles to a file
2015-11-16 09:15:25 -05:00
writeautomaticsub : Write the automatically generated subtitles to a file
2021-07-21 14:02:21 -04:00
allsubtitles : Deprecated - Use subtitleslangs = [ ' all ' ]
2021-04-19 17:17:09 -04:00
Downloads all the subtitles of the video
2013-09-14 05:14:40 -04:00
( requires writesubtitles or writeautomaticsub )
2013-06-18 16:14:21 -04:00
listsubtitles : Lists all available subtitles for the video
2015-02-15 12:03:41 -05:00
subtitlesformat : The format code for subtitles
2021-04-19 17:17:09 -04:00
subtitleslangs : List of languages of the subtitles to download ( can be regex ) .
The list may contain " all " to refer to all the available
subtitles . The language can be prefixed with a " - " to
exclude it from the requested languages . Eg : [ ' all ' , ' -live_chat ' ]
2013-06-18 16:14:21 -04:00
keepvideo : Keep the video file after post - processing
daterange : A DateRange object , download only if the upload_date is in the range .
skip_download : Skip the actual download of the video file
2013-09-22 05:09:25 -04:00
cachedir : Location of the cache files in the filesystem .
2014-09-03 06:41:05 -04:00
False to disable filesystem cache .
2013-09-30 16:26:25 -04:00
noplaylist : Download single video instead of a playlist if in doubt .
2013-10-06 00:06:30 -04:00
age_limit : An integer representing the user ' s age in years.
Unsuitable videos for the given age are skipped .
2013-12-15 21:09:49 -05:00
min_views : An integer representing the minimum view count the video
must have in order to not be skipped .
Videos without view count information are always
downloaded . None for no limit .
max_views : An integer representing the maximum view count .
Videos that are more popular than that are not
downloaded .
Videos without view count information are always
downloaded . None for no limit .
download_archive : File name of a file where all downloads are recorded .
2013-10-05 22:27:09 -04:00
Videos already present in the file are not downloaded
again .
2021-01-18 14:17:48 -05:00
break_on_existing : Stop the download process after attempting to download a
file that is in the archive .
break_on_reject : Stop the download process when encountering a video that
has been filtered out .
cookiefile : File name where cookies should be read from and dumped to
2021-07-21 16:32:49 -04:00
cookiesfrombrowser : A tuple containing the name of the browser and the profile
name / path from where cookies are loaded .
Eg : ( ' chrome ' , ) or ( vivaldi , ' default ' )
2013-11-24 09:03:25 -05:00
nocheckcertificate : Do not verify SSL certificates
2014-03-20 19:33:53 -04:00
prefer_insecure : Use HTTP instead of HTTPS to retrieve information .
At the moment , this is only supported by YouTube .
2013-11-24 09:03:25 -05:00
proxy : URL of the proxy server to use
2016-07-03 11:23:48 -04:00
geo_verification_proxy : URL of the proxy to use for IP address verification
2018-05-19 12:53:24 -04:00
on geo - restricted sites .
2013-12-01 05:42:02 -05:00
socket_timeout : Time to wait for unresponsive hosts , in seconds
2013-12-08 22:08:51 -05:00
bidi_workaround : Work around buggy terminals without bidirectional text
support , using fridibi
2013-12-29 09:28:32 -05:00
debug_printtraffic : Print out sent and received HTTP traffic
2014-01-20 20:09:49 -05:00
include_ads : Download ads as well
2014-01-22 08:16:43 -05:00
default_search : Prepend this string if an input url is not valid .
' auto ' for elaborate guessing
2014-03-30 00:02:41 -04:00
encoding : Use this encoding instead of the system - specified .
2014-08-21 05:52:07 -04:00
extract_flat : Do not resolve URLs , return the immediate result .
2014-10-24 08:48:12 -04:00
Pass in ' in_playlist ' to only show this behavior for
playlist items .
2014-12-14 19:06:25 -05:00
postprocessors : A list of dictionaries , each with an entry
2014-12-14 19:26:18 -05:00
* key : The name of the postprocessor . See
2021-02-24 13:45:56 -05:00
yt_dlp / postprocessor / __init__ . py for a list .
2021-04-10 18:18:07 -04:00
* when : When to run the postprocessor . Can be one of
pre_process | before_dl | post_process | after_move .
Assumed to be ' post_process ' if not given
2020-12-29 10:03:07 -05:00
post_hooks : A list of functions that get called as the final step
for each video file , after all postprocessors have been
called . The filename will be passed as the only argument .
2014-12-14 19:26:18 -05:00
progress_hooks : A list of functions that get called on download
progress , with a dictionary with the entries
2015-02-17 15:37:48 -05:00
* status : One of " downloading " , " error " , or " finished " .
2015-01-25 00:15:51 -05:00
Check this first and ignore unknown values .
2021-07-21 13:28:43 -04:00
* info_dict : The extracted info_dict
2014-12-14 19:26:18 -05:00
2015-02-17 15:37:48 -05:00
If status is one of " downloading " , or " finished " , the
2015-01-25 00:15:51 -05:00
following properties may also be present :
* filename : The final filename ( always present )
2015-02-17 15:37:48 -05:00
* tmpfilename : The filename we ' re currently writing to
2014-12-14 19:26:18 -05:00
* downloaded_bytes : Bytes on disk
* total_bytes : Size of the whole file , None if unknown
2015-02-17 15:37:48 -05:00
* total_bytes_estimate : Guess of the eventual file size ,
None if unavailable .
* elapsed : The number of seconds since download started .
2014-12-14 19:26:18 -05:00
* eta : The estimated time in seconds , None if unknown
* speed : The download speed in bytes / second , None if
unknown
2015-02-17 15:37:48 -05:00
* fragment_index : The counter of the currently
downloaded video fragment .
* fragment_count : The number of fragments ( = individual
files that will be merged )
2014-12-14 19:26:18 -05:00
Progress hooks are guaranteed to be called at least once
( with status " finished " ) if the download is successful .
2015-01-09 19:59:14 -05:00
merge_output_format : Extension to use when merging formats .
2021-01-28 00:18:36 -05:00
final_ext : Expected final extension ; used to detect when the file was
already downloaded and converted . " merge_output_format " is
replaced by this extension when given
2015-01-09 23:45:51 -05:00
fixup : Automatically correct known faults of the file .
One of :
- " never " : do nothing
- " warn " : only emit a warning
- " detect_or_warn " : check whether we can do anything
2015-01-23 12:39:12 -05:00
about it , warn otherwise ( default )
2018-05-19 12:53:24 -04:00
source_address : Client - side IP address to bind to .
2016-01-10 13:27:22 -05:00
call_home : Boolean , true iff we are allowed to contact the
2021-02-24 13:45:56 -05:00
yt - dlp servers for debugging . ( BROKEN )
2021-02-27 07:41:23 -05:00
sleep_interval_requests : Number of seconds to sleep between requests
during extraction
2016-08-08 16:46:52 -04:00
sleep_interval : Number of seconds to sleep before each download when
used alone or a lower bound of a range for randomized
sleep before each download ( minimum possible number
of seconds to sleep ) when used along with
max_sleep_interval .
max_sleep_interval : Upper bound of a range for randomized sleep before each
download ( maximum possible number of seconds to sleep ) .
Must only be used along with sleep_interval .
Actual sleep time will be a random float from range
[ sleep_interval ; max_sleep_interval ] .
2021-02-27 07:41:23 -05:00
sleep_interval_subtitles : Number of seconds to sleep before each subtitle download
2015-01-24 20:38:47 -05:00
listformats : Print an overview of available video formats and exit .
list_thumbnails : Print a table of all thumbnails and exit .
2015-02-09 21:32:21 -05:00
match_filter : A function that gets called with the info_dict of
every video .
If it returns a message , the video is ignored .
If it returns None , the video is downloaded .
match_filter_func in utils . py is one example for this .
2015-02-09 22:22:10 -05:00
no_color : Do not emit color codes in output .
2017-02-18 13:53:41 -05:00
geo_bypass : Bypass geographic restriction via faking X - Forwarded - For
2018-05-19 12:53:24 -04:00
HTTP header
2017-02-18 13:53:41 -05:00
geo_bypass_country :
2017-02-04 06:49:58 -05:00
Two - letter ISO 3166 - 2 country code that will be used for
explicit geographic restriction bypassing via faking
2018-05-19 12:53:24 -04:00
X - Forwarded - For HTTP header
2018-05-01 20:18:01 -04:00
geo_bypass_ip_block :
IP range in CIDR notation that will be used similarly to
2018-05-19 12:53:24 -04:00
geo_bypass_country
2014-12-14 19:26:18 -05:00
2015-02-17 06:09:12 -05:00
The following options determine which downloader is picked :
2021-04-10 11:08:33 -04:00
external_downloader : A dictionary of protocol keys and the executable of the
external downloader to use for it . The allowed protocols
are default | http | ftp | m3u8 | dash | rtsp | rtmp | mms .
Set the value to ' native ' to use the native downloader
hls_prefer_native : Deprecated - Use external_downloader = { ' m3u8 ' : ' native ' }
or { ' m3u8 ' : ' ffmpeg ' } instead .
Use the native HLS downloader instead of ffmpeg / avconv
2016-04-21 13:02:17 -04:00
if True , otherwise use ffmpeg / avconv if False , otherwise
use downloader suggested by extractor if None .
2021-05-11 04:00:48 -04:00
compat_opts : Compatibility options . See " Differences in default behavior " .
2021-07-06 17:21:29 -04:00
The following options do not work when used through the API :
filename , abort - on - error , multistreams , no - live - chat ,
2021-08-05 14:00:53 -04:00
no - clean - infojson , no - playlist - metafiles , no - keep - subs .
2021-07-31 02:08:39 -04:00
Refer __init__ . py for their implementation
2013-10-22 08:49:34 -04:00
2013-06-18 16:14:21 -04:00
The following parameters are not used by YoutubeDL itself , they are used by
2021-02-24 13:45:56 -05:00
the downloader ( see yt_dlp / downloader / common . py ) :
2021-06-22 19:11:09 -04:00
nopart , updatetime , buffersize , ratelimit , throttledratelimit , min_filesize ,
max_filesize , test , noresizebuffer , retries , continuedl , noprogress , consoletitle ,
xattr_set_filesize , external_downloader_args , hls_use_mpegts , http_chunk_size .
2014-01-08 11:53:34 -05:00
The following options are used by the post processors :
2018-06-28 14:09:14 -04:00
prefer_ffmpeg : If False , use avconv instead of ffmpeg if both are available ,
2021-01-26 12:57:32 -05:00
otherwise prefer ffmpeg . ( avconv support is deprecated )
2019-04-01 14:29:44 -04:00
ffmpeg_location : Location of the ffmpeg / avconv binary ; either the path
to the binary or its containing directory .
2021-01-20 11:07:40 -05:00
postprocessor_args : A dictionary of postprocessor / executable keys ( in lower case )
2021-08-07 04:49:17 -04:00
and a list of additional command - line arguments for the
postprocessor / executable . The dict can also have " PP+EXE " keys
which are used when the given exe is used by the given PP .
Use ' default ' as the name for arguments to passed to all PP
For compatibility with youtube - dl , a single list of args
can also be used
2021-02-24 11:01:01 -05:00
The following options are used by the extractors :
2021-02-28 18:48:37 -05:00
extractor_retries : Number of times to retry for known errors
dynamic_mpd : Whether to process dynamic DASH manifests ( default : True )
2021-02-24 11:01:01 -05:00
hls_split_discontinuity : Split HLS playlists to different formats at
2021-02-28 18:48:37 -05:00
discontinuities such as ad breaks ( default : False )
2021-06-25 10:05:41 -04:00
extractor_args : A dictionary of arguments to be passed to the extractors .
See " EXTRACTOR ARGUMENTS " for details .
Eg : { ' youtube ' : { ' skip ' : [ ' dash ' , ' hls ' ] } }
youtube_include_dash_manifest : Deprecated - Use extractor_args instead .
If True ( default ) , DASH manifests and related
2021-02-28 18:48:37 -05:00
data will be downloaded and processed by extractor .
You can reduce network I / O by disabling it if you don ' t
care about DASH . ( only for youtube )
2021-06-25 10:05:41 -04:00
youtube_include_hls_manifest : Deprecated - Use extractor_args instead .
If True ( default ) , HLS manifests and related
2021-02-28 18:48:37 -05:00
data will be downloaded and processed by extractor .
You can reduce network I / O by disabling it if you don ' t
care about HLS . ( only for youtube )
2013-06-18 16:14:21 -04:00
"""
2017-06-08 11:53:14 -04:00
_NUMERIC_FIELDS = set ( (
' width ' , ' height ' , ' tbr ' , ' abr ' , ' asr ' , ' vbr ' , ' fps ' , ' filesize ' , ' filesize_approx ' ,
' timestamp ' , ' upload_year ' , ' upload_month ' , ' upload_day ' ,
' duration ' , ' view_count ' , ' like_count ' , ' dislike_count ' , ' repost_count ' ,
' average_rating ' , ' comment_count ' , ' age_limit ' ,
' start_time ' , ' end_time ' ,
' chapter_number ' , ' season_number ' , ' episode_number ' ,
' track_number ' , ' disc_number ' , ' release_year ' ,
' playlist_index ' ,
) )
2013-06-18 16:14:21 -04:00
params = None
2021-08-22 19:56:45 -04:00
_ies = { }
2021-04-10 18:18:07 -04:00
_pps = { ' pre_process ' : [ ] , ' before_dl ' : [ ] , ' after_move ' : [ ] , ' post_process ' : [ ] }
2021-07-21 08:36:34 -04:00
_printed_messages = set ( )
2021-02-27 07:41:23 -05:00
_first_webpage_request = True
2013-06-18 16:14:21 -04:00
_download_retcode = None
_num_downloads = None
2021-01-16 07:40:15 -05:00
_playlist_level = 0
_playlist_urls = set ( )
2013-06-18 16:14:21 -04:00
_screen_file = None
2014-10-28 07:54:29 -04:00
def __init__ ( self , params = None , auto_init = True ) :
2013-06-18 16:14:21 -04:00
""" Create a FileDownloader object with the given options. """
2013-12-31 07:34:52 -05:00
if params is None :
params = { }
2021-08-22 19:56:45 -04:00
self . _ies = { }
2013-07-08 09:14:27 -04:00
self . _ies_instances = { }
2021-04-10 18:18:07 -04:00
self . _pps = { ' pre_process ' : [ ] , ' before_dl ' : [ ] , ' after_move ' : [ ] , ' post_process ' : [ ] }
2021-07-21 08:36:34 -04:00
self . _printed_messages = set ( )
2021-02-27 07:41:23 -05:00
self . _first_webpage_request = True
2020-12-29 10:03:07 -05:00
self . _post_hooks = [ ]
2013-12-23 04:37:27 -05:00
self . _progress_hooks = [ ]
2013-06-18 16:14:21 -04:00
self . _download_retcode = 0
self . _num_downloads = 0
self . _screen_file = [ sys . stdout , sys . stderr ] [ params . get ( ' logtostderr ' , False ) ]
2013-12-08 22:08:51 -05:00
self . _err_file = sys . stderr
2015-09-05 09:17:30 -04:00
self . params = {
# Default parameters
' nocheckcertificate ' : False ,
}
self . params . update ( params )
2014-09-03 06:41:05 -04:00
self . cache = Cache ( self )
2013-09-21 05:48:07 -04:00
2021-05-08 18:24:44 -04:00
if sys . version_info < ( 3 , 6 ) :
self . report_warning (
2021-06-05 15:17:18 -04:00
' Python version %d . %d is not supported! Please update to Python 3.6 or above ' % sys . version_info [ : 2 ] )
2021-05-08 18:24:44 -04:00
2021-08-22 16:08:38 -04:00
if self . params . get ( ' allow_unplayable_formats ' ) :
self . report_warning (
' You have asked for unplayable formats to be listed/downloaded. '
' This is a developer option intended for debugging. '
' If you experience any issues while using this option, DO NOT open a bug report ' )
2017-02-23 18:04:27 -05:00
def check_deprecated ( param , option , suggestion ) :
if self . params . get ( param ) is not None :
2021-05-11 04:00:48 -04:00
self . report_warning ( ' %s is deprecated. Use %s instead ' % ( option , suggestion ) )
2017-02-23 18:04:27 -05:00
return True
return False
if check_deprecated ( ' cn_verification_proxy ' , ' --cn-verification-proxy ' , ' --geo-verification-proxy ' ) :
2016-07-03 11:23:48 -04:00
if self . params . get ( ' geo_verification_proxy ' ) is None :
self . params [ ' geo_verification_proxy ' ] = self . params [ ' cn_verification_proxy ' ]
2021-05-03 05:41:59 -04:00
check_deprecated ( ' autonumber ' , ' --auto-number ' , ' -o " %(autonumber)s - %(title)s . %(ext)s " ' )
check_deprecated ( ' usetitle ' , ' --title ' , ' -o " %(title)s - %(id)s . %(ext)s " ' )
2021-05-11 04:00:48 -04:00
check_deprecated ( ' useid ' , ' --id ' , ' -o " %(id)s . %(ext)s " ' )
2021-05-03 05:41:59 -04:00
for msg in self . params . get ( ' warnings ' , [ ] ) :
self . report_warning ( msg )
2021-08-07 07:20:46 -04:00
if self . params . get ( ' overwrites ' ) is None :
self . params . pop ( ' overwrites ' , None )
elif self . params . get ( ' nooverwrites ' ) is not None :
# nooverwrites was unnecessarily changed to overwrites
# in 0c3d0f51778b153f65c21906031c2e091fcfb641
# This ensures compatibility with both keys
self . params [ ' overwrites ' ] = not self . params [ ' nooverwrites ' ]
else :
self . params [ ' nooverwrites ' ] = not self . params [ ' overwrites ' ]
2021-02-01 10:15:46 -05:00
2013-12-08 22:08:51 -05:00
if params . get ( ' bidi_workaround ' , False ) :
2013-12-09 12:29:07 -05:00
try :
import pty
master , slave = pty . openpty ( )
2015-02-28 15:42:16 -05:00
width = compat_get_terminal_size ( ) . columns
2013-12-09 12:29:07 -05:00
if width is None :
width_args = [ ]
else :
width_args = [ ' -w ' , str ( width ) ]
2013-12-22 22:19:20 -05:00
sp_kwargs = dict (
2013-12-09 12:29:07 -05:00
stdin = subprocess . PIPE ,
stdout = slave ,
stderr = self . _err_file )
2013-12-22 22:19:20 -05:00
try :
self . _output_process = subprocess . Popen (
[ ' bidiv ' ] + width_args , * * sp_kwargs
)
except OSError :
self . _output_process = subprocess . Popen (
[ ' fribidi ' , ' -c ' , ' UTF-8 ' ] + width_args , * * sp_kwargs )
self . _output_channel = os . fdopen ( master , ' rb ' )
2013-12-09 12:29:07 -05:00
except OSError as ose :
2016-05-14 07:41:41 -04:00
if ose . errno == errno . ENOENT :
2014-01-04 19:52:03 -05:00
self . report_warning ( ' Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH. ' )
2013-12-09 12:29:07 -05:00
else :
raise
2013-12-08 22:08:51 -05:00
2019-05-10 16:56:22 -04:00
if ( sys . platform != ' win32 '
and sys . getfilesystemencoding ( ) in [ ' ascii ' , ' ANSI_X3.4-1968 ' ]
and not params . get ( ' restrictfilenames ' , False ) ) :
2017-05-08 14:14:02 -04:00
# Unicode filesystem API will throw errors (#1474, #13027)
2013-09-21 05:48:07 -04:00
self . report_warning (
2014-01-04 19:52:03 -05:00
' Assuming --restrict-filenames since file system encoding '
2014-10-09 11:00:24 -04:00
' cannot encode all characters. '
2014-01-04 19:52:03 -05:00
' Set the LC_ALL environment variable to fix this. ' )
2013-11-26 12:53:36 -05:00
self . params [ ' restrictfilenames ' ] = True
2013-09-21 05:48:07 -04:00
2021-02-03 08:36:09 -05:00
self . outtmpl_dict = self . parse_outtmpl ( )
2015-03-13 03:40:20 -04:00
2021-06-11 09:43:22 -04:00
# Creating format selector here allows us to catch syntax errors before the extraction
self . format_selector = (
None if self . params . get ( ' format ' ) is None
else self . build_format_selector ( self . params [ ' format ' ] ) )
2013-11-22 13:57:52 -05:00
self . _setup_opener ( )
2021-05-03 06:31:20 -04:00
""" Preload the archive, if any is specified """
def preload_download_archive ( fn ) :
if fn is None :
return False
2021-05-14 03:45:29 -04:00
self . write_debug ( ' Loading archive file %r \n ' % fn )
2021-05-03 06:31:20 -04:00
try :
with locked_file ( fn , ' r ' , encoding = ' utf-8 ' ) as archive_file :
for line in archive_file :
self . archive . add ( line . strip ( ) )
except IOError as ioe :
if ioe . errno != errno . ENOENT :
raise
return False
return True
self . archive = set ( )
preload_download_archive ( self . params . get ( ' download_archive ' ) )
2014-10-28 07:54:29 -04:00
if auto_init :
self . print_debug_header ( )
self . add_default_info_extractors ( )
2014-12-14 19:06:25 -05:00
for pp_def_raw in self . params . get ( ' postprocessors ' , [ ] ) :
pp_def = dict ( pp_def_raw )
2021-06-19 18:19:23 -04:00
when = pp_def . pop ( ' when ' , ' post_process ' )
pp_class = get_postprocessor ( pp_def . pop ( ' key ' ) )
2014-12-14 19:06:25 -05:00
pp = pp_class ( self , * * compat_kwargs ( pp_def ) )
2021-01-26 05:20:20 -05:00
self . add_post_processor ( pp , when = when )
2014-12-14 19:06:25 -05:00
2020-12-29 10:03:07 -05:00
for ph in self . params . get ( ' post_hooks ' , [ ] ) :
self . add_post_hook ( ph )
2014-12-14 19:26:18 -05:00
for ph in self . params . get ( ' progress_hooks ' , [ ] ) :
self . add_progress_hook ( ph )
2016-05-03 03:15:32 -04:00
register_socks_protocols ( )
2014-11-23 04:49:19 -05:00
def warn_if_short_id ( self , argv ) :
# short YouTube ID starting with dash?
idxs = [
i for i , a in enumerate ( argv )
if re . match ( r ' ^-[0-9A-Za-z_-] {10} $ ' , a ) ]
if idxs :
correct_argv = (
2021-02-24 13:45:56 -05:00
[ ' yt-dlp ' ]
2019-05-10 16:56:22 -04:00
+ [ a for i , a in enumerate ( argv ) if i not in idxs ]
+ [ ' -- ' ] + [ argv [ i ] for i in idxs ]
2014-11-23 04:49:19 -05:00
)
self . report_warning (
' Long argument string detected. '
' Use -- to separate parameters and URLs, like this: \n %s \n ' %
args_to_str ( correct_argv ) )
2013-06-18 16:14:21 -04:00
def add_info_extractor ( self , ie ) :
""" Add an InfoExtractor object to the end of the list. """
2021-08-22 19:56:45 -04:00
ie_key = ie . ie_key ( )
self . _ies [ ie_key ] = ie
2016-02-10 07:16:18 -05:00
if not isinstance ( ie , type ) :
2021-08-22 19:56:45 -04:00
self . _ies_instances [ ie_key ] = ie
2016-02-10 07:16:18 -05:00
ie . set_downloader ( self )
2013-06-18 16:14:21 -04:00
2021-08-22 19:56:45 -04:00
def _get_info_extractor_class ( self , ie_key ) :
ie = self . _ies . get ( ie_key )
if ie is None :
ie = get_info_extractor ( ie_key )
self . add_info_extractor ( ie )
return ie
2013-07-08 09:14:27 -04:00
def get_info_extractor ( self , ie_key ) :
"""
Get an instance of an IE with name ie_key , it will try to get one from
the _ies list , if there ' s no instance it will create a new one and add
it to the extractor list .
"""
ie = self . _ies_instances . get ( ie_key )
if ie is None :
ie = get_info_extractor ( ie_key ) ( )
self . add_info_extractor ( ie )
return ie
2013-06-27 17:51:06 -04:00
def add_default_info_extractors ( self ) :
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
2016-02-10 07:16:18 -05:00
for ie in gen_extractor_classes ( ) :
2013-06-27 17:51:06 -04:00
self . add_info_extractor ( ie )
2021-04-10 18:18:07 -04:00
def add_post_processor ( self , pp , when = ' post_process ' ) :
2013-06-18 16:14:21 -04:00
""" Add a PostProcessor object to the end of the chain. """
2021-01-26 05:20:20 -05:00
self . _pps [ when ] . append ( pp )
2013-06-18 16:14:21 -04:00
pp . set_downloader ( self )
2020-12-29 10:03:07 -05:00
def add_post_hook ( self , ph ) :
""" Add the post hook """
self . _post_hooks . append ( ph )
2013-12-23 04:37:27 -05:00
def add_progress_hook ( self , ph ) :
""" Add the progress hook (currently only for the file downloader) """
self . _progress_hooks . append ( ph )
2013-09-23 12:09:28 -04:00
2013-12-09 12:29:07 -05:00
def _bidi_workaround ( self , message ) :
2013-12-22 22:19:20 -05:00
if not hasattr ( self , ' _output_channel ' ) :
2013-12-09 12:29:07 -05:00
return message
2013-12-22 22:19:20 -05:00
assert hasattr ( self , ' _output_process ' )
2014-07-25 17:37:32 -04:00
assert isinstance ( message , compat_str )
2014-01-04 19:52:03 -05:00
line_count = message . count ( ' \n ' ) + 1
self . _output_process . stdin . write ( ( message + ' \n ' ) . encode ( ' utf-8 ' ) )
2013-12-22 22:19:20 -05:00
self . _output_process . stdin . flush ( )
2014-01-04 19:52:03 -05:00
res = ' ' . join ( self . _output_channel . readline ( ) . decode ( ' utf-8 ' )
2014-11-23 15:39:15 -05:00
for _ in range ( line_count ) )
2014-01-04 19:52:03 -05:00
return res [ : - len ( ' \n ' ) ]
2013-12-09 12:29:07 -05:00
2021-07-21 08:36:34 -04:00
def _write_string ( self , message , out = None , only_once = False ) :
if only_once :
if message in self . _printed_messages :
return
self . _printed_messages . add ( message )
write_string ( message , out = out , encoding = self . params . get ( ' encoding ' ) )
2014-04-07 13:57:42 -04:00
2021-05-04 11:39:36 -04:00
def to_stdout ( self , message , skip_eol = False , quiet = False ) :
2021-05-14 03:45:29 -04:00
""" Print message to stdout """
2013-11-24 00:08:11 -05:00
if self . params . get ( ' logger ' ) :
2013-11-23 03:22:18 -05:00
self . params [ ' logger ' ] . debug ( message )
2021-05-28 17:01:10 -04:00
elif not quiet or self . params . get ( ' verbose ' ) :
self . _write_string (
' %s %s ' % ( self . _bidi_workaround ( message ) , ( ' ' if skip_eol else ' \n ' ) ) ,
self . _err_file if quiet else self . _screen_file )
2013-06-18 16:14:21 -04:00
2021-07-21 08:36:34 -04:00
def to_stderr ( self , message , only_once = False ) :
2021-05-14 03:45:29 -04:00
""" Print message to stderr """
2014-07-25 17:37:32 -04:00
assert isinstance ( message , compat_str )
2013-11-24 00:08:11 -05:00
if self . params . get ( ' logger ' ) :
2013-11-23 03:22:18 -05:00
self . params [ ' logger ' ] . error ( message )
else :
2021-07-21 08:36:34 -04:00
self . _write_string ( ' %s \n ' % self . _bidi_workaround ( message ) , self . _err_file , only_once = only_once )
2013-06-18 16:14:21 -04:00
2013-11-17 05:39:52 -05:00
def to_console_title ( self , message ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
2017-06-03 08:14:23 -04:00
if compat_os_name == ' nt ' :
if ctypes . windll . kernel32 . GetConsoleWindow ( ) :
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes . windll . kernel32 . SetConsoleTitleW ( ctypes . c_wchar_p ( message ) )
2013-11-17 05:39:52 -05:00
elif ' TERM ' in os . environ :
2021-01-21 14:39:24 -05:00
self . _write_string ( ' \033 ]0; %s \007 ' % message , self . _screen_file )
2013-11-17 05:39:52 -05:00
2013-11-17 15:05:14 -05:00
def save_console_title ( self ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
2021-08-06 20:01:51 -04:00
if self . params . get ( ' simulate ' ) :
2018-04-08 14:03:55 -04:00
return
2017-06-03 08:14:23 -04:00
if compat_os_name != ' nt ' and ' TERM ' in os . environ :
2013-11-18 10:35:41 -05:00
# Save the title on stack
2014-04-07 13:57:42 -04:00
self . _write_string ( ' \033 [22;0t ' , self . _screen_file )
2013-11-17 15:05:14 -05:00
def restore_console_title ( self ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
2021-08-06 20:01:51 -04:00
if self . params . get ( ' simulate ' ) :
2018-04-08 14:03:55 -04:00
return
2017-06-03 08:14:23 -04:00
if compat_os_name != ' nt ' and ' TERM ' in os . environ :
2013-11-18 10:35:41 -05:00
# Restore the title from stack
2014-04-07 13:57:42 -04:00
self . _write_string ( ' \033 [23;0t ' , self . _screen_file )
2013-11-17 15:05:14 -05:00
def __enter__ ( self ) :
self . save_console_title ( )
return self
def __exit__ ( self , * args ) :
self . restore_console_title ( )
2014-01-25 06:02:43 -05:00
2013-11-22 13:57:52 -05:00
if self . params . get ( ' cookiefile ' ) is not None :
2018-12-08 18:00:32 -05:00
self . cookiejar . save ( ignore_discard = True , ignore_expires = True )
2013-11-17 15:05:14 -05:00
2013-06-18 16:14:21 -04:00
def trouble ( self , message = None , tb = None ) :
""" Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not , this method may throw an exception or
not when errors are found , after printing the message .
tb , if given , is additional traceback information .
"""
if message is not None :
self . to_stderr ( message )
if self . params . get ( ' verbose ' ) :
if tb is None :
if sys . exc_info ( ) [ 0 ] : # if .trouble has been called from an except block
2014-01-04 19:52:03 -05:00
tb = ' '
2013-06-18 16:14:21 -04:00
if hasattr ( sys . exc_info ( ) [ 1 ] , ' exc_info ' ) and sys . exc_info ( ) [ 1 ] . exc_info [ 0 ] :
2014-01-04 19:52:03 -05:00
tb + = ' ' . join ( traceback . format_exception ( * sys . exc_info ( ) [ 1 ] . exc_info ) )
2015-12-19 19:29:36 -05:00
tb + = encode_compat_str ( traceback . format_exc ( ) )
2013-06-18 16:14:21 -04:00
else :
tb_data = traceback . format_list ( traceback . extract_stack ( ) )
2014-01-04 19:52:03 -05:00
tb = ' ' . join ( tb_data )
2021-05-25 15:43:08 -04:00
if tb :
self . to_stderr ( tb )
2013-06-18 16:14:21 -04:00
if not self . params . get ( ' ignoreerrors ' , False ) :
if sys . exc_info ( ) [ 0 ] and hasattr ( sys . exc_info ( ) [ 1 ] , ' exc_info ' ) and sys . exc_info ( ) [ 1 ] . exc_info [ 0 ] :
exc_info = sys . exc_info ( ) [ 1 ] . exc_info
else :
exc_info = sys . exc_info ( )
raise DownloadError ( message , exc_info )
self . _download_retcode = 1
2021-05-14 03:45:29 -04:00
def to_screen ( self , message , skip_eol = False ) :
""" Print message to stdout if not in quiet mode """
self . to_stdout (
message , skip_eol , quiet = self . params . get ( ' quiet ' , False ) )
2021-07-20 16:05:35 -04:00
def report_warning ( self , message , only_once = False ) :
2013-06-18 16:14:21 -04:00
'''
Print the message to stderr , it will be prefixed with ' WARNING: '
If stderr is a tty file the ' WARNING: ' will be colored
'''
2014-03-09 09:53:07 -04:00
if self . params . get ( ' logger ' ) is not None :
self . params [ ' logger ' ] . warning ( message )
2013-06-18 16:14:21 -04:00
else :
2014-03-25 19:43:46 -04:00
if self . params . get ( ' no_warnings ' ) :
return
2016-03-03 06:24:24 -05:00
if not self . params . get ( ' no_color ' ) and self . _err_file . isatty ( ) and compat_os_name != ' nt ' :
2014-03-09 09:53:07 -04:00
_msg_header = ' \033 [0;33mWARNING: \033 [0m '
else :
_msg_header = ' WARNING: '
warning_message = ' %s %s ' % ( _msg_header , message )
2021-07-21 08:36:34 -04:00
self . to_stderr ( warning_message , only_once )
2013-06-18 16:14:21 -04:00
def report_error ( self , message , tb = None ) :
'''
Do the same as trouble , but prefixes the message with ' ERROR: ' , colored
in red if stderr is a tty file .
'''
2016-03-03 06:24:24 -05:00
if not self . params . get ( ' no_color ' ) and self . _err_file . isatty ( ) and compat_os_name != ' nt ' :
2014-01-04 19:52:03 -05:00
_msg_header = ' \033 [0;31mERROR: \033 [0m '
2013-06-18 16:14:21 -04:00
else :
2014-01-04 19:52:03 -05:00
_msg_header = ' ERROR: '
error_message = ' %s %s ' % ( _msg_header , message )
2013-06-18 16:14:21 -04:00
self . trouble ( error_message , tb )
2021-07-21 08:36:34 -04:00
def write_debug ( self , message , only_once = False ) :
2021-05-14 03:45:29 -04:00
''' Log debug message or Print message to stderr '''
if not self . params . get ( ' verbose ' , False ) :
return
message = ' [debug] %s ' % message
if self . params . get ( ' logger ' ) :
self . params [ ' logger ' ] . debug ( message )
else :
2021-07-21 08:36:34 -04:00
self . to_stderr ( message , only_once )
2021-05-14 03:45:29 -04:00
2013-06-18 16:14:21 -04:00
def report_file_already_downloaded ( self , file_name ) :
""" Report file has already been fully downloaded. """
try :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [download] %s has already been downloaded ' % file_name )
2013-11-17 10:47:52 -05:00
except UnicodeEncodeError :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [download] The file has already been downloaded ' )
2013-06-18 16:14:21 -04:00
2019-10-13 12:00:48 -04:00
def report_file_delete ( self , file_name ) :
""" Report that existing file will be deleted. """
try :
2021-02-04 17:53:04 -05:00
self . to_screen ( ' Deleting existing file %s ' % file_name )
2019-10-13 12:00:48 -04:00
except UnicodeEncodeError :
2021-02-04 17:53:04 -05:00
self . to_screen ( ' Deleting existing file ' )
2019-10-13 12:00:48 -04:00
2021-08-18 21:49:23 -04:00
def raise_no_formats ( self , info , forced = False ) :
has_drm = info . get ( ' __has_drm ' )
2021-08-22 16:08:38 -04:00
msg = ' This video is DRM protected ' if has_drm else ' No video formats found! '
expected = self . params . get ( ' ignore_no_formats_error ' )
if forced or not expected :
2021-08-18 21:49:23 -04:00
raise ExtractorError ( msg , video_id = info [ ' id ' ] , ie = info [ ' extractor ' ] ,
expected = has_drm or expected )
2021-08-22 16:08:38 -04:00
else :
self . report_warning ( msg )
2021-02-03 08:36:09 -05:00
def parse_outtmpl ( self ) :
outtmpl_dict = self . params . get ( ' outtmpl ' , { } )
if not isinstance ( outtmpl_dict , dict ) :
outtmpl_dict = { ' default ' : outtmpl_dict }
outtmpl_dict . update ( {
k : v for k , v in DEFAULT_OUTTMPL . items ( )
if not outtmpl_dict . get ( k ) } )
for key , val in outtmpl_dict . items ( ) :
if isinstance ( val , bytes ) :
self . report_warning (
' Parameter outtmpl is bytes, but should be a unicode string. '
' Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x. ' )
return outtmpl_dict
2021-06-12 11:18:06 -04:00
def get_output_path ( self , dir_type = ' ' , filename = None ) :
paths = self . params . get ( ' paths ' , { } )
assert isinstance ( paths , dict )
path = os . path . join (
expand_path ( paths . get ( ' home ' , ' ' ) . strip ( ) ) ,
expand_path ( paths . get ( dir_type , ' ' ) . strip ( ) ) if dir_type else ' ' ,
filename or ' ' )
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
if sys . version_info < ( 3 , 0 ) and sys . platform == ' win32 ' :
path = encodeFilename ( path , True ) . decode ( preferredencoding ( ) )
return sanitize_path ( path , force = self . params . get ( ' windowsfilenames ' ) )
2021-06-08 10:41:00 -04:00
@staticmethod
2021-07-28 19:49:26 -04:00
def _outtmpl_expandpath ( outtmpl ) :
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ' ' . join ( [ random . choice ( ascii_letters ) for _ in range ( 32 ) ] )
outtmpl = outtmpl . replace ( ' %% ' , ' % {0} % ' . format ( sep ) ) . replace ( ' $$ ' , ' $ {0} $ ' . format ( sep ) )
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
return expand_path ( outtmpl ) . replace ( sep , ' ' )
@staticmethod
def escape_outtmpl ( outtmpl ) :
''' Escape any remaining strings like %s , %a bc % e tc. '''
return re . sub (
STR_FORMAT_RE_TMPL . format ( ' ' , ' (?![ % ( \0 ]) ' ) ,
lambda mobj : ( ' ' if mobj . group ( ' has_key ' ) else ' % ' ) + mobj . group ( 0 ) ,
outtmpl )
@classmethod
def validate_outtmpl ( cls , outtmpl ) :
2021-06-08 10:41:00 -04:00
''' @return None or Exception object '''
2021-07-28 22:56:17 -04:00
outtmpl = re . sub (
2021-09-17 14:16:17 -04:00
STR_FORMAT_RE_TMPL . format ( ' [^)]* ' , ' [ljqB] ' ) ,
2021-07-28 22:56:17 -04:00
lambda mobj : f ' { mobj . group ( 0 ) [ : - 1 ] } s ' ,
cls . _outtmpl_expandpath ( outtmpl ) )
2021-06-08 10:41:00 -04:00
try :
2021-07-28 22:56:17 -04:00
cls . escape_outtmpl ( outtmpl ) % collections . defaultdict ( int )
2021-06-08 10:41:00 -04:00
return None
except ValueError as err :
return err
2021-03-24 18:02:15 -04:00
def prepare_outtmpl ( self , outtmpl , info_dict , sanitize = None ) :
2021-07-28 19:49:26 -04:00
""" Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % i nfo_dict """
2021-08-07 11:46:55 -04:00
info_dict . setdefault ( ' epoch ' , int ( time . time ( ) ) ) # keep epoch consistent once set
2021-03-24 18:02:15 -04:00
2021-08-07 11:46:55 -04:00
info_dict = dict ( info_dict ) # Do not sanitize so as not to consume LazyList
for key in ( ' __original_infodict ' , ' __postprocessors ' ) :
info_dict . pop ( key , None )
2021-06-03 14:00:38 -04:00
info_dict [ ' duration_string ' ] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
2021-05-14 03:44:38 -04:00
formatSeconds ( info_dict [ ' duration ' ] , ' - ' if sanitize else ' : ' )
2021-03-24 18:02:15 -04:00
if info_dict . get ( ' duration ' , None ) is not None
else None )
2021-06-03 14:00:38 -04:00
info_dict [ ' autonumber ' ] = self . params . get ( ' autonumber_start ' , 1 ) - 1 + self . _num_downloads
if info_dict . get ( ' resolution ' ) is None :
info_dict [ ' resolution ' ] = self . format_resolution ( info_dict , default = None )
2021-03-24 18:02:15 -04:00
# For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
2021-06-03 14:00:38 -04:00
' playlist_index ' : len ( str ( info_dict . get ( ' _last_playlist_index ' ) or ' ' ) ) ,
' autonumber ' : self . params . get ( ' autonumber_size ' ) or 5 ,
2021-03-24 18:02:15 -04:00
}
2021-06-03 14:00:38 -04:00
2021-06-09 10:17:50 -04:00
TMPL_DICT = { }
2021-09-17 14:16:17 -04:00
EXTERNAL_FORMAT_RE = re . compile ( STR_FORMAT_RE_TMPL . format ( ' [^)]* ' , f ' [ { STR_FORMAT_TYPES } ljqB] ' ) )
2021-06-09 10:17:50 -04:00
MATH_FUNCTIONS = {
' + ' : float . __add__ ,
' - ' : float . __sub__ ,
}
2021-05-03 13:06:03 -04:00
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
2021-08-06 19:42:54 -04:00
FIELD_RE = r ' \ w*(?: \ .(?: \ w+| {num} | {num} ?(?:: {num} ?) {{ 1,2}}))* ' . format ( num = r ' (?:-? \ d+) ' )
2021-06-09 10:17:50 -04:00
MATH_FIELD_RE = r ''' {field} | {num} ''' . format ( field = FIELD_RE , num = r ' -? \ d+(?:. \ d+)? ' )
MATH_OPERATORS_RE = r ' (?: %s ) ' % ' | ' . join ( map ( re . escape , MATH_FUNCTIONS . keys ( ) ) )
2021-05-03 13:06:03 -04:00
INTERNAL_FORMAT_RE = re . compile ( r ''' (?x)
( ? P < negate > - ) ?
2021-06-09 10:17:50 -04:00
( ? P < fields > { field } )
( ? P < maths > ( ? : { math_op } { math_field } ) * )
2021-05-03 13:06:03 -04:00
( ? : > ( ? P < strf_format > . + ? ) ) ?
2021-09-18 06:51:38 -04:00
( ? P < alternate > ( ? < ! \\) , [ ^ | ) ] + ) ?
2021-05-03 13:06:03 -04:00
( ? : \| ( ? P < default > . * ? ) ) ?
2021-06-09 10:17:50 -04:00
$ ''' .format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
2021-06-03 14:00:38 -04:00
2021-08-06 19:42:54 -04:00
def _traverse_infodict ( k ) :
k = k . split ( ' . ' )
if k [ 0 ] == ' ' :
k . pop ( 0 )
return traverse_obj ( info_dict , k , is_user_input = True , traverse_string = True )
2021-06-08 10:41:00 -04:00
2021-06-03 14:00:38 -04:00
def get_value ( mdict ) :
# Object traversal
2021-08-06 19:42:54 -04:00
value = _traverse_infodict ( mdict [ ' fields ' ] )
2021-06-03 14:00:38 -04:00
# Negative
if mdict [ ' negate ' ] :
value = float_or_none ( value )
if value is not None :
value * = - 1
# Do maths
2021-06-09 10:17:50 -04:00
offset_key = mdict [ ' maths ' ]
if offset_key :
2021-06-03 14:00:38 -04:00
value = float_or_none ( value )
operator = None
2021-06-09 10:17:50 -04:00
while offset_key :
item = re . match (
MATH_FIELD_RE if operator else MATH_OPERATORS_RE ,
offset_key ) . group ( 0 )
offset_key = offset_key [ len ( item ) : ]
if operator is None :
2021-06-03 14:00:38 -04:00
operator = MATH_FUNCTIONS [ item ]
2021-06-09 10:17:50 -04:00
continue
item , multiplier = ( item [ 1 : ] , - 1 ) if item [ 0 ] == ' - ' else ( item , 1 )
offset = float_or_none ( item )
if offset is None :
2021-08-06 19:42:54 -04:00
offset = float_or_none ( _traverse_infodict ( item ) )
2021-06-09 10:17:50 -04:00
try :
value = operator ( value , multiplier * offset )
except ( TypeError , ZeroDivisionError ) :
return None
operator = None
2021-06-03 14:00:38 -04:00
# Datetime formatting
if mdict [ ' strf_format ' ] :
2021-09-18 06:51:38 -04:00
value = strftime_or_none ( value , mdict [ ' strf_format ' ] . replace ( ' \\ , ' , ' , ' ) )
2021-06-03 14:00:38 -04:00
return value
2021-08-07 07:20:46 -04:00
na = self . params . get ( ' outtmpl_na_placeholder ' , ' NA ' )
2021-08-07 11:46:55 -04:00
def _dumpjson_default ( obj ) :
if isinstance ( obj , ( set , LazyList ) ) :
return list ( obj )
raise TypeError ( f ' Object of type { type ( obj ) . __name__ } is not JSON serializable ' )
2021-06-03 14:00:38 -04:00
def create_key ( outer_mobj ) :
if not outer_mobj . group ( ' has_key ' ) :
2021-07-28 19:49:26 -04:00
return f ' % { outer_mobj . group ( 0 ) } '
2021-06-03 14:00:38 -04:00
key = outer_mobj . group ( ' key ' )
mobj = re . match ( INTERNAL_FORMAT_RE , key )
2021-09-18 06:51:38 -04:00
initial_field = mobj . group ( ' fields ' ) . split ( ' . ' ) [ - 1 ] if mobj else ' '
value , default = None , na
while mobj :
2021-05-03 13:06:03 -04:00
mobj = mobj . groupdict ( )
2021-09-18 06:51:38 -04:00
default = mobj [ ' default ' ] if mobj [ ' default ' ] is not None else default
2021-06-03 14:00:38 -04:00
value = get_value ( mobj )
2021-09-18 06:51:38 -04:00
if value is None and mobj [ ' alternate ' ] :
mobj = re . match ( INTERNAL_FORMAT_RE , mobj [ ' alternate ' ] [ 1 : ] )
else :
break
2021-06-03 14:00:38 -04:00
2021-08-07 07:20:46 -04:00
fmt = outer_mobj . group ( ' format ' )
2021-06-03 14:00:38 -04:00
if fmt == ' s ' and value is not None and key in field_size_compat_map . keys ( ) :
fmt = ' 0 {:d} d ' . format ( field_size_compat_map [ key ] )
value = default if value is None else value
2021-07-28 22:56:17 -04:00
str_fmt = f ' { fmt [ : - 1 ] } s '
if fmt [ - 1 ] == ' l ' :
value , fmt = ' , ' . join ( variadic ( value ) ) , str_fmt
elif fmt [ - 1 ] == ' j ' :
2021-08-07 11:46:55 -04:00
value , fmt = json . dumps ( value , default = _dumpjson_default ) , str_fmt
2021-07-28 22:56:17 -04:00
elif fmt [ - 1 ] == ' q ' :
value , fmt = compat_shlex_quote ( str ( value ) ) , str_fmt
2021-09-17 14:16:17 -04:00
elif fmt [ - 1 ] == ' B ' :
value = f ' % { str_fmt } ' . encode ( ' utf-8 ' ) % str ( value ) . encode ( ' utf-8 ' )
value , fmt = value . decode ( ' utf-8 ' , ' ignore ' ) , ' s '
2021-07-28 22:56:17 -04:00
elif fmt [ - 1 ] == ' c ' :
value = str ( value )
2021-06-08 10:41:00 -04:00
if value is None :
value , fmt = default , ' s '
else :
value = value [ 0 ]
elif fmt [ - 1 ] not in ' rs ' : # numeric
2021-04-15 08:31:16 -04:00
value = float_or_none ( value )
2021-06-03 14:00:38 -04:00
if value is None :
value , fmt = default , ' s '
2021-07-28 19:49:26 -04:00
2021-06-03 14:00:38 -04:00
if sanitize :
if fmt [ - 1 ] == ' r ' :
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
2021-07-28 22:56:17 -04:00
value , fmt = repr ( value ) , str_fmt
2021-06-09 05:13:51 -04:00
if fmt [ - 1 ] in ' csr ' :
2021-09-18 06:51:38 -04:00
value = sanitize ( initial_field , value )
2021-07-28 19:49:26 -04:00
2021-08-07 07:20:46 -04:00
key = ' %s \0 %s ' % ( key . replace ( ' % ' , ' % \0 ' ) , outer_mobj . group ( ' format ' ) )
2021-06-09 10:17:50 -04:00
TMPL_DICT [ key ] = value
2021-08-07 07:20:46 -04:00
return ' {prefix} % ( {key} ) {fmt} ' . format ( key = key , fmt = fmt , prefix = outer_mobj . group ( ' prefix ' ) )
2021-06-03 14:00:38 -04:00
2021-06-09 10:17:50 -04:00
return EXTERNAL_FORMAT_RE . sub ( create_key , outtmpl ) , TMPL_DICT
2021-03-24 18:02:15 -04:00
2021-02-03 08:36:09 -05:00
def _prepare_filename ( self , info_dict , tmpl_type = ' default ' ) :
2013-06-18 16:14:21 -04:00
try :
2013-10-22 16:28:19 -04:00
sanitize = lambda k , v : sanitize_filename (
2013-12-10 05:23:35 -05:00
compat_str ( v ) ,
2015-03-08 10:57:30 -04:00
restricted = self . params . get ( ' restrictfilenames ' ) ,
2017-03-01 11:03:36 -05:00
is_id = ( k == ' id ' or k . endswith ( ' _id ' ) ) )
2021-02-03 08:36:09 -05:00
outtmpl = self . outtmpl_dict . get ( tmpl_type , self . outtmpl_dict [ ' default ' ] )
2021-03-24 18:02:15 -04:00
outtmpl , template_dict = self . prepare_outtmpl ( outtmpl , info_dict , sanitize )
2021-07-28 19:49:26 -04:00
outtmpl = self . escape_outtmpl ( self . _outtmpl_expandpath ( outtmpl ) )
filename = outtmpl % template_dict
2017-07-13 13:40:54 -04:00
2021-03-24 18:02:15 -04:00
force_ext = OUTTMPL_TYPES . get ( tmpl_type )
2021-02-03 08:36:09 -05:00
if force_ext is not None :
2021-06-03 14:00:38 -04:00
filename = replace_extension ( filename , force_ext , info_dict . get ( ' ext ' ) )
2021-02-03 08:36:09 -05:00
2020-09-29 23:50:09 -04:00
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self . params . get ( ' trim_file_name ' , False )
if trim_file_name :
fn_groups = filename . rsplit ( ' . ' )
ext = fn_groups [ - 1 ]
sub_ext = ' '
if len ( fn_groups ) > 2 :
sub_ext = fn_groups [ - 2 ]
filename = ' . ' . join ( filter ( None , [ fn_groups [ 0 ] [ : trim_file_name ] , sub_ext , ext ] ) )
2021-01-23 07:18:12 -05:00
return filename
2013-06-18 16:14:21 -04:00
except ValueError as err :
2014-01-04 19:52:03 -05:00
self . report_error ( ' Error in output template: ' + str ( err ) + ' (encoding: ' + repr ( preferredencoding ( ) ) + ' ) ' )
2013-06-18 16:14:21 -04:00
return None
2021-02-03 08:36:09 -05:00
def prepare_filename ( self , info_dict , dir_type = ' ' , warn = False ) :
""" Generate the output filename. """
2021-06-12 11:18:06 -04:00
2021-02-03 08:36:09 -05:00
filename = self . _prepare_filename ( info_dict , dir_type or ' default ' )
2021-07-20 16:05:35 -04:00
if warn :
2021-06-12 11:18:06 -04:00
if not self . params . get ( ' paths ' ) :
2021-02-03 08:36:09 -05:00
pass
elif filename == ' - ' :
2021-07-20 16:05:35 -04:00
self . report_warning ( ' --paths is ignored when an outputting to stdout ' , only_once = True )
2021-02-03 08:36:09 -05:00
elif os . path . isabs ( filename ) :
2021-07-20 16:05:35 -04:00
self . report_warning ( ' --paths is ignored since an absolute path is given in output template ' , only_once = True )
2021-02-03 08:36:09 -05:00
if filename == ' - ' or not filename :
return filename
2021-06-12 11:18:06 -04:00
return self . get_output_path ( dir_type , filename )
2021-01-23 07:18:12 -05:00
2021-05-28 12:38:01 -04:00
def _match_entry ( self , info_dict , incomplete = False , silent = False ) :
2020-09-17 14:22:07 -04:00
""" Returns None if the file should be downloaded """
2013-06-18 16:14:21 -04:00
2021-05-28 16:42:07 -04:00
video_title = info_dict . get ( ' title ' , info_dict . get ( ' id ' , ' video ' ) )
2021-01-12 20:01:01 -05:00
def check_filter ( ) :
if ' title ' in info_dict :
# This can happen when we're just evaluating the playlist
title = info_dict [ ' title ' ]
matchtitle = self . params . get ( ' matchtitle ' , False )
if matchtitle :
if not re . search ( matchtitle , title , re . IGNORECASE ) :
return ' " ' + title + ' " title did not match pattern " ' + matchtitle + ' " '
rejecttitle = self . params . get ( ' rejecttitle ' , False )
if rejecttitle :
if re . search ( rejecttitle , title , re . IGNORECASE ) :
return ' " ' + title + ' " title matched reject pattern " ' + rejecttitle + ' " '
date = info_dict . get ( ' upload_date ' )
if date is not None :
dateRange = self . params . get ( ' daterange ' , DateRange ( ) )
if date not in dateRange :
return ' %s upload date is not in range %s ' % ( date_from_str ( date ) . isoformat ( ) , dateRange )
view_count = info_dict . get ( ' view_count ' )
if view_count is not None :
min_views = self . params . get ( ' min_views ' )
if min_views is not None and view_count < min_views :
return ' Skipping %s , because it has not reached minimum view count ( %d / %d ) ' % ( video_title , view_count , min_views )
max_views = self . params . get ( ' max_views ' )
if max_views is not None and view_count > max_views :
return ' Skipping %s , because it has exceeded the maximum view count ( %d / %d ) ' % ( video_title , view_count , max_views )
if age_restricted ( info_dict . get ( ' age_limit ' ) , self . params . get ( ' age_limit ' ) ) :
return ' Skipping " %s " because it is age restricted ' % video_title
2021-08-15 04:12:23 -04:00
match_filter = self . params . get ( ' match_filter ' )
if match_filter is not None :
try :
ret = match_filter ( info_dict , incomplete = incomplete )
except TypeError :
# For backward compatibility
ret = None if incomplete else match_filter ( info_dict )
if ret is not None :
return ret
2021-01-12 20:01:01 -05:00
return None
2021-05-28 16:42:07 -04:00
if self . in_download_archive ( info_dict ) :
reason = ' %s has already been recorded in the archive ' % video_title
break_opt , break_err = ' break_on_existing ' , ExistingVideoReached
else :
reason = check_filter ( )
break_opt , break_err = ' break_on_reject ' , RejectedVideoReached
2021-01-12 20:01:01 -05:00
if reason is not None :
2021-05-28 12:38:01 -04:00
if not silent :
self . to_screen ( ' [download] ' + reason )
2021-05-28 16:42:07 -04:00
if self . params . get ( break_opt , False ) :
raise break_err ( )
2021-01-12 20:01:01 -05:00
return reason
2013-10-22 08:49:34 -04:00
2013-11-03 05:56:45 -05:00
@staticmethod
def add_extra_info ( info_dict , extra_info ) :
''' Set the keys from extra_info in info dict if they are missing '''
for key , value in extra_info . items ( ) :
info_dict . setdefault ( key , value )
2021-09-03 13:18:42 -04:00
def extract_info ( self , url , download = True , ie_key = None , extra_info = None ,
2015-06-12 16:05:21 -04:00
process = True , force_generic_extractor = False ) :
2021-05-06 12:01:20 -04:00
"""
Return a list with a dictionary for each video extracted .
Arguments :
url - - URL to extract
Keyword arguments :
download - - whether to download videos during extraction
ie_key - - extractor key hint
extra_info - - dictionary containing the extra values to add to each result
process - - whether to resolve all unresolved references ( URLs , playlist items ) ,
must be True for download to work .
force_generic_extractor - - force using the generic extractor
"""
2013-10-22 08:49:34 -04:00
2021-09-03 13:18:42 -04:00
if extra_info is None :
extra_info = { }
2015-06-12 16:05:21 -04:00
if not ie_key and force_generic_extractor :
2015-06-12 09:20:12 -04:00
ie_key = ' Generic '
2013-06-18 16:14:21 -04:00
if ie_key :
2021-08-22 19:56:45 -04:00
ies = { ie_key : self . _get_info_extractor_class ( ie_key ) }
2013-06-18 16:14:21 -04:00
else :
ies = self . _ies
2021-08-22 19:56:45 -04:00
for ie_key , ie in ies . items ( ) :
2013-06-18 16:14:21 -04:00
if not ie . suitable ( url ) :
continue
if not ie . working ( ) :
2014-01-04 19:52:03 -05:00
self . report_warning ( ' The program functionality for this site has been marked as broken, '
' and will probably not work. ' )
2013-06-18 16:14:21 -04:00
2021-08-18 21:49:23 -04:00
temp_id = ie . get_temp_id ( url )
2020-11-21 09:50:42 -05:00
if temp_id is not None and self . in_download_archive ( { ' id ' : temp_id , ' ie_key ' : ie_key } ) :
self . to_screen ( " [ %s ] %s : has already been recorded in archive " % (
ie_key , temp_id ) )
break
2021-08-22 19:56:45 -04:00
return self . __extract_info ( url , self . get_info_extractor ( ie_key ) , download , extra_info , process )
2020-11-21 09:50:42 -05:00
else :
self . report_error ( ' no suitable InfoExtractor for URL %s ' % url )
2021-09-03 17:37:27 -04:00
def __handle_extraction_exceptions ( func ) :
2020-11-21 09:50:42 -05:00
def wrapper ( self , * args , * * kwargs ) :
try :
return func ( self , * args , * * kwargs )
2017-02-04 06:49:58 -05:00
except GeoRestrictedError as e :
msg = e . msg
if e . countries :
msg + = ' \n This video is available in %s . ' % ' , ' . join (
map ( ISO3166Utils . short2full , e . countries ) )
msg + = ' \n You might want to use a VPN or a proxy server (with --proxy) to workaround. '
self . report_error ( msg )
2015-12-19 19:16:19 -05:00
except ExtractorError as e : # An error we somewhat expected
2015-12-19 19:35:58 -05:00
self . report_error ( compat_str ( e ) , e . format_traceback ( ) )
2021-06-22 19:11:09 -04:00
except ThrottledDownload :
self . to_stderr ( ' \r ' )
self . report_warning ( ' The download speed is below throttle limit. Re-extracting data ' )
return wrapper ( self , * args , * * kwargs )
2021-09-03 17:37:27 -04:00
except ( MaxDownloadsReached , ExistingVideoReached , RejectedVideoReached , LazyList . IndexError ) :
2014-01-23 04:36:47 -05:00
raise
2013-06-18 16:14:21 -04:00
except Exception as e :
2021-09-03 17:37:27 -04:00
if self . params . get ( ' ignoreerrors ' , False ) :
2015-12-19 20:00:39 -05:00
self . report_error ( error_to_compat_str ( e ) , tb = encode_compat_str ( traceback . format_exc ( ) ) )
2013-06-18 16:14:21 -04:00
else :
raise
2020-11-21 09:50:42 -05:00
return wrapper
@__handle_extraction_exceptions
2021-04-27 05:02:08 -04:00
def __extract_info ( self , url , ie , download , extra_info , process ) :
2020-11-21 09:50:42 -05:00
ie_result = ie . extract ( url )
if ie_result is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance ( ie_result , list ) :
# Backwards compatibility: old IE result format
ie_result = {
' _type ' : ' compat_list ' ,
' entries ' : ie_result ,
}
2021-07-21 15:45:32 -04:00
if extra_info . get ( ' original_url ' ) :
ie_result . setdefault ( ' original_url ' , extra_info [ ' original_url ' ] )
2020-11-21 09:50:42 -05:00
self . add_default_extra_info ( ie_result , ie , url )
if process :
return self . process_ie_result ( ie_result , download , extra_info )
2013-06-18 16:14:21 -04:00
else :
2020-11-21 09:50:42 -05:00
return ie_result
2013-10-22 08:49:34 -04:00
2014-03-23 11:06:03 -04:00
def add_default_extra_info ( self , ie_result , ie , url ) :
2021-06-24 10:38:43 -04:00
if url is not None :
self . add_extra_info ( ie_result , {
' webpage_url ' : url ,
' original_url ' : url ,
' webpage_url_basename ' : url_basename ( url ) ,
} )
if ie is not None :
self . add_extra_info ( ie_result , {
' extractor ' : ie . IE_NAME ,
' extractor_key ' : ie . ie_key ( ) ,
} )
2014-03-23 11:06:03 -04:00
2021-08-18 17:40:32 -04:00
def process_ie_result ( self , ie_result , download = True , extra_info = None ) :
2013-06-18 16:14:21 -04:00
"""
Take the result of the ie ( may be modified ) and resolve all unresolved
references ( URLs , playlist items ) .
It will also download the videos if ' download ' .
Returns the resolved ie_result .
"""
2021-08-18 17:40:32 -04:00
if extra_info is None :
extra_info = { }
2014-08-21 05:52:07 -04:00
result_type = ie_result . get ( ' _type ' , ' video ' )
2014-10-24 08:48:12 -04:00
if result_type in ( ' url ' , ' url_transparent ' ) :
2016-05-13 18:46:38 -04:00
ie_result [ ' url ' ] = sanitize_url ( ie_result [ ' url ' ] )
2021-07-21 15:45:32 -04:00
if ie_result . get ( ' original_url ' ) :
extra_info . setdefault ( ' original_url ' , ie_result [ ' original_url ' ] )
2014-10-24 08:48:12 -04:00
extract_flat = self . params . get ( ' extract_flat ' , False )
2019-05-10 16:56:22 -04:00
if ( ( extract_flat == ' in_playlist ' and ' playlist ' in extra_info )
or extract_flat is True ) :
2021-06-07 14:47:53 -04:00
info_copy = ie_result . copy ( )
2021-06-24 10:38:43 -04:00
ie = try_get ( ie_result . get ( ' ie_key ' ) , self . get_info_extractor )
2021-09-02 20:44:26 -04:00
if not ie_result . get ( ' id ' ) :
info_copy [ ' id ' ] = ie . get_temp_id ( ie_result [ ' url ' ] )
2021-06-24 10:38:43 -04:00
self . add_default_extra_info ( info_copy , ie , ie_result [ ' url ' ] )
2021-09-02 20:44:26 -04:00
self . add_extra_info ( info_copy , extra_info )
2021-06-07 14:47:53 -04:00
self . __forced_printings ( info_copy , self . prepare_filename ( info_copy ) , incomplete = True )
2021-09-02 20:44:26 -04:00
if self . params . get ( ' force_write_download_archive ' , False ) :
self . record_download_archive ( info_copy )
2014-08-21 05:52:07 -04:00
return ie_result
2013-06-18 16:14:21 -04:00
if result_type == ' video ' :
2013-11-03 05:56:45 -05:00
self . add_extra_info ( ie_result , extra_info )
2021-05-18 14:20:29 -04:00
ie_result = self . process_video_result ( ie_result , download = download )
2021-05-20 08:32:58 -04:00
additional_urls = ( ie_result or { } ) . get ( ' additional_urls ' )
2021-05-18 14:20:29 -04:00
if additional_urls :
2021-08-09 15:52:55 -04:00
# TODO: Improve MetadataParserPP to allow setting a list
2021-05-18 14:20:29 -04:00
if isinstance ( additional_urls , compat_str ) :
additional_urls = [ additional_urls ]
self . to_screen (
' [info] %s : %d additional URL(s) requested ' % ( ie_result [ ' id ' ] , len ( additional_urls ) ) )
self . write_debug ( ' Additional URLs: " %s " ' % ' " , " ' . join ( additional_urls ) )
ie_result [ ' additional_entries ' ] = [
self . extract_info (
url , download , extra_info ,
force_generic_extractor = self . params . get ( ' force_generic_extractor ' ) )
for url in additional_urls
]
return ie_result
2013-06-18 16:14:21 -04:00
elif result_type == ' url ' :
# We have to add extra_info to the results because it may be
# contained in a playlist
2021-05-18 14:20:59 -04:00
return self . extract_info (
ie_result [ ' url ' ] , download ,
ie_key = ie_result . get ( ' ie_key ' ) ,
extra_info = extra_info )
2013-12-05 08:29:08 -05:00
elif result_type == ' url_transparent ' :
# Use the information from the embedding page
info = self . extract_info (
ie_result [ ' url ' ] , ie_key = ie_result . get ( ' ie_key ' ) ,
extra_info = extra_info , download = False , process = False )
2017-03-31 12:57:35 -04:00
# extract_info may return None when ignoreerrors is enabled and
# extraction failed with an error, don't crash and return early
# in this case
if not info :
return info
2014-12-12 09:55:55 -05:00
force_properties = dict (
( k , v ) for k , v in ie_result . items ( ) if v is not None )
2017-07-20 13:13:32 -04:00
for f in ( ' _type ' , ' url ' , ' id ' , ' extractor ' , ' extractor_key ' , ' ie_key ' ) :
2014-12-12 09:55:55 -05:00
if f in force_properties :
del force_properties [ f ]
new_result = info . copy ( )
new_result . update ( force_properties )
2013-12-05 08:29:08 -05:00
2017-04-15 13:56:53 -04:00
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
2019-03-09 07:14:41 -05:00
# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
2017-04-15 13:56:53 -04:00
if new_result . get ( ' _type ' ) == ' url ' :
new_result [ ' _type ' ] = ' url_transparent '
2013-12-05 08:29:08 -05:00
return self . process_ie_result (
new_result , download = download , extra_info = extra_info )
2017-04-12 15:38:43 -04:00
elif result_type in ( ' playlist ' , ' multi_video ' ) :
2021-01-16 07:40:15 -05:00
# Protect from infinite recursion due to recursively nested playlists
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
webpage_url = ie_result [ ' webpage_url ' ]
if webpage_url in self . _playlist_urls :
2017-10-06 12:34:46 -04:00
self . to_screen (
2021-01-16 07:40:15 -05:00
' [download] Skipping already downloaded playlist: %s '
% ie_result . get ( ' title ' ) or ie_result . get ( ' id ' ) )
return
2017-10-06 12:34:46 -04:00
2021-01-16 07:40:15 -05:00
self . _playlist_level + = 1
self . _playlist_urls . add ( webpage_url )
2021-05-23 07:58:15 -04:00
self . _sanitize_thumbnails ( ie_result )
2021-01-16 07:40:15 -05:00
try :
return self . __process_playlist ( ie_result , download )
finally :
self . _playlist_level - = 1
if not self . _playlist_level :
self . _playlist_urls . clear ( )
2013-06-18 16:14:21 -04:00
elif result_type == ' compat_list ' :
2014-11-20 10:29:31 -05:00
self . report_warning (
' Extractor %s returned a compat_list result. '
' It needs to be updated. ' % ie_result . get ( ' extractor ' ) )
2014-11-23 14:41:03 -05:00
2013-06-18 16:14:21 -04:00
def _fixup ( r ) :
2021-08-07 07:20:46 -04:00
self . add_extra_info ( r , {
' extractor ' : ie_result [ ' extractor ' ] ,
' webpage_url ' : ie_result [ ' webpage_url ' ] ,
' webpage_url_basename ' : url_basename ( ie_result [ ' webpage_url ' ] ) ,
' extractor_key ' : ie_result [ ' extractor_key ' ] ,
} )
2013-06-18 16:14:21 -04:00
return r
ie_result [ ' entries ' ] = [
2013-11-03 05:56:45 -05:00
self . process_ie_result ( _fixup ( r ) , download , extra_info )
2013-06-18 16:14:21 -04:00
for r in ie_result [ ' entries ' ]
]
return ie_result
else :
raise Exception ( ' Invalid result type: %s ' % result_type )
2021-03-08 21:17:21 -05:00
def _ensure_dir_exists ( self , path ) :
return make_dir ( path , self . report_error )
2021-01-16 07:40:15 -05:00
def __process_playlist ( self , ie_result , download ) :
# We process each entry in the playlist
playlist = ie_result . get ( ' title ' ) or ie_result . get ( ' id ' )
self . to_screen ( ' [download] Downloading playlist: %s ' % playlist )
2021-03-23 15:45:53 -04:00
if ' entries ' not in ie_result :
raise EntryNotInPlaylist ( )
incomplete_entries = bool ( ie_result . get ( ' requested_entries ' ) )
if incomplete_entries :
def fill_missing_entries ( entries , indexes ) :
ret = [ None ] * max ( * indexes )
for i , entry in zip ( indexes , entries ) :
ret [ i - 1 ] = entry
return ret
ie_result [ ' entries ' ] = fill_missing_entries ( ie_result [ ' entries ' ] , ie_result [ ' requested_entries ' ] )
2021-01-27 19:54:58 -05:00
2021-01-16 07:40:15 -05:00
playlist_results = [ ]
2021-05-28 12:37:11 -04:00
playliststart = self . params . get ( ' playliststart ' , 1 )
2021-01-16 07:40:15 -05:00
playlistend = self . params . get ( ' playlistend ' )
# For backwards compatibility, interpret -1 as whole list
if playlistend == - 1 :
playlistend = None
playlistitems_str = self . params . get ( ' playlist_items ' )
playlistitems = None
if playlistitems_str is not None :
def iter_playlistitems ( format ) :
for string_segment in format . split ( ' , ' ) :
if ' - ' in string_segment :
start , end = string_segment . split ( ' - ' )
for item in range ( int ( start ) , int ( end ) + 1 ) :
yield int ( item )
else :
yield int ( string_segment )
playlistitems = orderedSet ( iter_playlistitems ( playlistitems_str ) )
ie_entries = ie_result [ ' entries ' ]
2021-05-28 12:37:11 -04:00
msg = (
' Downloading %d videos ' if not isinstance ( ie_entries , list )
else ' Collected %d videos; downloading %% d of them ' % len ( ie_entries ) )
2021-09-03 17:37:27 -04:00
if isinstance ( ie_entries , list ) :
def get_entry ( i ) :
return ie_entries [ i - 1 ]
else :
if not isinstance ( ie_entries , PagedList ) :
ie_entries = LazyList ( ie_entries )
def get_entry ( i ) :
return YoutubeDL . __handle_extraction_exceptions (
lambda self , i : ie_entries [ i - 1 ]
) ( self , i )
2021-07-20 15:30:46 -04:00
2021-05-28 12:37:11 -04:00
entries = [ ]
for i in playlistitems or itertools . count ( playliststart ) :
if playlistitems is None and playlistend is not None and playlistend < i :
break
entry = None
try :
2021-07-20 15:30:46 -04:00
entry = get_entry ( i )
2021-05-28 12:37:11 -04:00
if entry is None :
2021-03-23 15:45:53 -04:00
raise EntryNotInPlaylist ( )
2021-05-28 12:37:11 -04:00
except ( IndexError , EntryNotInPlaylist ) :
if incomplete_entries :
raise EntryNotInPlaylist ( )
elif not playlistitems :
break
entries . append ( entry )
2021-05-28 12:38:01 -04:00
try :
if entry is not None :
self . _match_entry ( entry , incomplete = True , silent = True )
except ( ExistingVideoReached , RejectedVideoReached ) :
break
2021-05-28 12:37:11 -04:00
ie_result [ ' entries ' ] = entries
2021-01-16 07:40:15 -05:00
2021-05-28 12:37:11 -04:00
# Save playlist_index before re-ordering
entries = [
2021-08-17 09:32:06 -04:00
( ( playlistitems [ i - 1 ] if playlistitems else i + playliststart - 1 ) , entry )
2021-05-28 12:37:11 -04:00
for i , entry in enumerate ( entries , 1 )
if entry is not None ]
n_entries = len ( entries )
2021-03-23 15:45:53 -04:00
if not playlistitems and ( playliststart or playlistend ) :
2021-05-28 12:37:11 -04:00
playlistitems = list ( range ( playliststart , playliststart + n_entries ) )
2021-03-23 15:45:53 -04:00
ie_result [ ' requested_entries ' ] = playlistitems
if self . params . get ( ' allow_playlist_files ' , True ) :
ie_copy = {
' playlist ' : playlist ,
' playlist_id ' : ie_result . get ( ' id ' ) ,
' playlist_title ' : ie_result . get ( ' title ' ) ,
' playlist_uploader ' : ie_result . get ( ' uploader ' ) ,
' playlist_uploader_id ' : ie_result . get ( ' uploader_id ' ) ,
2021-05-06 11:26:19 -04:00
' playlist_index ' : 0 ,
2021-03-23 15:45:53 -04:00
}
ie_copy . update ( dict ( ie_result ) )
if self . params . get ( ' writeinfojson ' , False ) :
infofn = self . prepare_filename ( ie_copy , ' pl_infojson ' )
if not self . _ensure_dir_exists ( encodeFilename ( infofn ) ) :
return
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( infofn ) ) :
self . to_screen ( ' [info] Playlist metadata is already present ' )
else :
self . to_screen ( ' [info] Writing playlist metadata as JSON to: ' + infofn )
try :
2021-08-04 18:07:16 -04:00
write_json_file ( self . sanitize_info ( ie_result , self . params . get ( ' clean_infojson ' , True ) ) , infofn )
2021-03-23 15:45:53 -04:00
except ( OSError , IOError ) :
self . report_error ( ' Cannot write playlist metadata to JSON file ' + infofn )
2021-05-17 07:45:33 -04:00
# TODO: This should be passed to ThumbnailsConvertor if necessary
self . _write_thumbnails ( ie_copy , self . prepare_filename ( ie_copy , ' pl_thumbnail ' ) )
2021-03-23 15:45:53 -04:00
if self . params . get ( ' writedescription ' , False ) :
descfn = self . prepare_filename ( ie_copy , ' pl_description ' )
if not self . _ensure_dir_exists ( encodeFilename ( descfn ) ) :
return
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( descfn ) ) :
self . to_screen ( ' [info] Playlist description is already present ' )
elif ie_result . get ( ' description ' ) is None :
self . report_warning ( ' There \' s no playlist description to write. ' )
else :
try :
self . to_screen ( ' [info] Writing playlist description to: ' + descfn )
with io . open ( encodeFilename ( descfn ) , ' w ' , encoding = ' utf-8 ' ) as descfile :
descfile . write ( ie_result [ ' description ' ] )
except ( OSError , IOError ) :
self . report_error ( ' Cannot write playlist description file ' + descfn )
return
2021-01-16 07:40:15 -05:00
if self . params . get ( ' playlistreverse ' , False ) :
entries = entries [ : : - 1 ]
if self . params . get ( ' playlistrandom ' , False ) :
random . shuffle ( entries )
x_forwarded_for = ie_result . get ( ' __x_forwarded_for_ip ' )
2021-05-28 12:37:11 -04:00
self . to_screen ( ' [ %s ] playlist %s : %s ' % ( ie_result [ ' extractor ' ] , playlist , msg % n_entries ) )
2021-04-21 02:00:43 -04:00
failures = 0
max_failures = self . params . get ( ' skip_playlist_after_errors ' ) or float ( ' inf ' )
2021-05-06 11:26:19 -04:00
for i , entry_tuple in enumerate ( entries , 1 ) :
playlist_index , entry = entry_tuple
2021-09-02 21:04:55 -04:00
if ' playlist-index ' in self . params . get ( ' compat_opts ' , [ ] ) :
playlist_index = playlistitems [ i - 1 ] if playlistitems else i + playliststart - 1
2021-01-16 07:40:15 -05:00
self . to_screen ( ' [download] Downloading video %s of %s ' % ( i , n_entries ) )
# This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes
if x_forwarded_for :
entry [ ' __x_forwarded_for_ip ' ] = x_forwarded_for
extra = {
' n_entries ' : n_entries ,
2021-05-03 11:11:33 -04:00
' _last_playlist_index ' : max ( playlistitems ) if playlistitems else ( playlistend or n_entries ) ,
2021-05-06 11:26:19 -04:00
' playlist_index ' : playlist_index ,
' playlist_autonumber ' : i ,
2021-01-16 07:40:15 -05:00
' playlist ' : playlist ,
' playlist_id ' : ie_result . get ( ' id ' ) ,
' playlist_title ' : ie_result . get ( ' title ' ) ,
' playlist_uploader ' : ie_result . get ( ' uploader ' ) ,
' playlist_uploader_id ' : ie_result . get ( ' uploader_id ' ) ,
' extractor ' : ie_result [ ' extractor ' ] ,
' webpage_url ' : ie_result [ ' webpage_url ' ] ,
' webpage_url_basename ' : url_basename ( ie_result [ ' webpage_url ' ] ) ,
' extractor_key ' : ie_result [ ' extractor_key ' ] ,
}
if self . _match_entry ( entry , incomplete = True ) is not None :
continue
entry_result = self . __process_iterable_entry ( entry , download , extra )
2021-04-21 02:00:43 -04:00
if not entry_result :
failures + = 1
if failures > = max_failures :
self . report_error (
' Skipping the remaining entries in playlist " %s " since %d items failed extraction ' % ( playlist , failures ) )
break
2021-01-16 07:40:15 -05:00
# TODO: skip failed (empty) entries?
playlist_results . append ( entry_result )
ie_result [ ' entries ' ] = playlist_results
self . to_screen ( ' [download] Finished downloading playlist: %s ' % playlist )
return ie_result
2020-11-21 09:50:42 -05:00
@__handle_extraction_exceptions
def __process_iterable_entry ( self , entry , download , extra_info ) :
return self . process_ie_result (
entry , download = download , extra_info = extra_info )
2015-06-28 16:08:29 -04:00
def _build_format_filter ( self , filter_spec ) :
" Returns a function to filter the formats according to the filter_spec "
2015-01-22 18:04:05 -05:00
OPERATORS = {
' < ' : operator . lt ,
' <= ' : operator . le ,
' > ' : operator . gt ,
' >= ' : operator . ge ,
' = ' : operator . eq ,
' != ' : operator . ne ,
}
2015-06-28 16:08:29 -04:00
operator_rex = re . compile ( r ''' (?x) \ s*
2021-06-11 09:43:22 -04:00
( ? P < key > width | height | tbr | abr | vbr | asr | filesize | filesize_approx | fps ) \s *
( ? P < op > % s ) ( ? P < none_inclusive > \s * \? ) ? \s *
( ? P < value > [ 0 - 9. ] + ( ? : [ kKmMgGtTpPeEzZyY ] i ? [ Bb ] ? ) ? ) \s *
2015-01-22 18:04:05 -05:00
''' % ' | ' .join(map(re.escape, OPERATORS.keys())))
2021-06-11 09:43:22 -04:00
m = operator_rex . fullmatch ( filter_spec )
2015-02-08 14:07:43 -05:00
if m :
try :
comparison_value = int ( m . group ( ' value ' ) )
except ValueError :
comparison_value = parse_filesize ( m . group ( ' value ' ) )
if comparison_value is None :
comparison_value = parse_filesize ( m . group ( ' value ' ) + ' B ' )
if comparison_value is None :
raise ValueError (
' Invalid value %r in format specification %r ' % (
2015-06-28 16:08:29 -04:00
m . group ( ' value ' ) , filter_spec ) )
2015-02-08 14:07:43 -05:00
op = OPERATORS [ m . group ( ' op ' ) ]
2015-01-22 18:04:05 -05:00
if not m :
2015-02-08 14:07:43 -05:00
STR_OPERATORS = {
' = ' : operator . eq ,
2016-01-13 03:24:48 -05:00
' ^= ' : lambda attr , value : attr . startswith ( value ) ,
' $= ' : lambda attr , value : attr . endswith ( value ) ,
' *= ' : lambda attr , value : value in attr ,
2015-02-08 14:07:43 -05:00
}
2021-06-11 09:43:22 -04:00
str_operator_rex = re . compile ( r ''' (?x) \ s*
( ? P < key > [ a - zA - Z0 - 9. _ - ] + ) \s *
( ? P < negation > ! \s * ) ? ( ? P < op > % s ) ( ? P < none_inclusive > \s * \? ) ? \s *
( ? P < value > [ a - zA - Z0 - 9. _ - ] + ) \s *
2015-02-08 14:07:43 -05:00
''' % ' | ' .join(map(re.escape, STR_OPERATORS.keys())))
2021-06-11 09:43:22 -04:00
m = str_operator_rex . fullmatch ( filter_spec )
2015-02-08 14:07:43 -05:00
if m :
comparison_value = m . group ( ' value ' )
2019-01-20 01:48:09 -05:00
str_op = STR_OPERATORS [ m . group ( ' op ' ) ]
if m . group ( ' negation ' ) :
2019-01-23 13:34:41 -05:00
op = lambda attr , value : not str_op ( attr , value )
2019-01-20 01:48:09 -05:00
else :
op = str_op
2015-01-22 18:04:05 -05:00
2015-02-08 14:07:43 -05:00
if not m :
2021-06-11 09:43:22 -04:00
raise SyntaxError ( ' Invalid filter specification %r ' % filter_spec )
2015-01-22 18:04:05 -05:00
def _filter ( f ) :
actual_value = f . get ( m . group ( ' key ' ) )
if actual_value is None :
return m . group ( ' none_inclusive ' )
return op ( actual_value , comparison_value )
2015-06-28 16:08:29 -04:00
return _filter
2017-07-22 13:12:01 -04:00
def _default_format_spec ( self , info_dict , download = True ) :
2017-10-11 12:45:03 -04:00
def can_merge ( ) :
merger = FFmpegMergerPP ( self )
return merger . available and merger . can_merge ( )
Change defaults
* Enabled --ignore by default
* Disabled --video-multistreams and --audio-multistreams by default
* Changed default format selection to 'bv*+ba/b' when --audio-multistreams is disabled
* Changed default format sort order to 'res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id'
* Changed default output template to '%(title)s [%(id)s].%(ext)s'
* Enabled `--list-formats-as-table` by default
2021-01-04 11:40:47 -05:00
prefer_best = (
2021-08-06 20:01:51 -04:00
not self . params . get ( ' simulate ' )
Change defaults
* Enabled --ignore by default
* Disabled --video-multistreams and --audio-multistreams by default
* Changed default format selection to 'bv*+ba/b' when --audio-multistreams is disabled
* Changed default format sort order to 'res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id'
* Changed default output template to '%(title)s [%(id)s].%(ext)s'
* Enabled `--list-formats-as-table` by default
2021-01-04 11:40:47 -05:00
and download
and (
not can_merge ( )
2021-01-07 06:41:39 -05:00
or info_dict . get ( ' is_live ' , False )
2021-02-03 08:36:09 -05:00
or self . outtmpl_dict [ ' default ' ] == ' - ' ) )
2021-05-11 04:00:48 -04:00
compat = (
prefer_best
or self . params . get ( ' allow_multiple_audio_streams ' , False )
or ' format-spec ' in self . params . get ( ' compat_opts ' , [ ] ) )
Change defaults
* Enabled --ignore by default
* Disabled --video-multistreams and --audio-multistreams by default
* Changed default format selection to 'bv*+ba/b' when --audio-multistreams is disabled
* Changed default format sort order to 'res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id'
* Changed default output template to '%(title)s [%(id)s].%(ext)s'
* Enabled `--list-formats-as-table` by default
2021-01-04 11:40:47 -05:00
return (
2021-05-11 04:00:48 -04:00
' best/bestvideo+bestaudio ' if prefer_best
else ' bestvideo*+bestaudio/best ' if not compat
Change defaults
* Enabled --ignore by default
* Disabled --video-multistreams and --audio-multistreams by default
* Changed default format selection to 'bv*+ba/b' when --audio-multistreams is disabled
* Changed default format sort order to 'res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id'
* Changed default output template to '%(title)s [%(id)s].%(ext)s'
* Enabled `--list-formats-as-table` by default
2021-01-04 11:40:47 -05:00
else ' bestvideo+bestaudio/best ' )
2017-07-22 13:12:01 -04:00
2015-06-28 16:08:29 -04:00
def build_format_selector ( self , format_spec ) :
def syntax_error ( note , start ) :
message = (
' Invalid format specification: '
' {0} \n \t {1} \n \t {2} ^ ' . format ( note , format_spec , ' ' * start [ 1 ] ) )
return SyntaxError ( message )
PICKFIRST = ' PICKFIRST '
MERGE = ' MERGE '
SINGLE = ' SINGLE '
2015-06-29 06:42:02 -04:00
GROUP = ' GROUP '
2015-06-28 16:08:29 -04:00
FormatSelector = collections . namedtuple ( ' FormatSelector ' , [ ' type ' , ' selector ' , ' filters ' ] )
Change defaults
* Enabled --ignore by default
* Disabled --video-multistreams and --audio-multistreams by default
* Changed default format selection to 'bv*+ba/b' when --audio-multistreams is disabled
* Changed default format sort order to 'res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id'
* Changed default output template to '%(title)s [%(id)s].%(ext)s'
* Enabled `--list-formats-as-table` by default
2021-01-04 11:40:47 -05:00
allow_multiple_streams = { ' audio ' : self . params . get ( ' allow_multiple_audio_streams ' , False ) ,
' video ' : self . params . get ( ' allow_multiple_video_streams ' , False ) }
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
2021-05-04 11:54:00 -04:00
check_formats = self . params . get ( ' check_formats ' )
2015-06-28 16:08:29 -04:00
def _parse_filter ( tokens ) :
filter_parts = [ ]
for type , string , start , _ , _ in tokens :
if type == tokenize . OP and string == ' ] ' :
return ' ' . join ( filter_parts )
else :
filter_parts . append ( string )
2015-08-04 16:29:23 -04:00
def _remove_unused_ops ( tokens ) :
2015-11-20 12:21:46 -05:00
# Remove operators that we don't use and join them with the surrounding strings
2015-08-04 16:29:23 -04:00
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ( ' / ' , ' + ' , ' , ' , ' ( ' , ' ) ' )
last_string , last_start , last_end , last_line = None , None , None , None
for type , string , start , end , line in tokens :
if type == tokenize . OP and string == ' [ ' :
if last_string :
yield tokenize . NAME , last_string , last_start , last_end , last_line
last_string = None
yield type , string , start , end , line
# everything inside brackets will be handled by _parse_filter
for type , string , start , end , line in tokens :
yield type , string , start , end , line
if type == tokenize . OP and string == ' ] ' :
break
elif type == tokenize . OP and string in ALLOWED_OPS :
if last_string :
yield tokenize . NAME , last_string , last_start , last_end , last_line
last_string = None
yield type , string , start , end , line
elif type in [ tokenize . NAME , tokenize . NUMBER , tokenize . OP ] :
if not last_string :
last_string = string
last_start = start
last_end = end
else :
last_string + = string
if last_string :
yield tokenize . NAME , last_string , last_start , last_end , last_line
2015-06-30 13:45:42 -04:00
def _parse_format_selection ( tokens , inside_merge = False , inside_choice = False , inside_group = False ) :
2015-06-28 16:08:29 -04:00
selectors = [ ]
current_selector = None
for type , string , start , _ , _ in tokens :
# ENCODING is only defined in python 3.x
if type == getattr ( tokenize , ' ENCODING ' , None ) :
continue
elif type in [ tokenize . NAME , tokenize . NUMBER ] :
current_selector = FormatSelector ( SINGLE , string , [ ] )
elif type == tokenize . OP :
2015-06-30 13:45:42 -04:00
if string == ' ) ' :
if not inside_group :
# ')' will be handled by the parentheses group
tokens . restore_last_token ( )
2015-06-28 16:08:29 -04:00
break
2015-06-30 13:45:42 -04:00
elif inside_merge and string in [ ' / ' , ' , ' ] :
2015-06-29 06:42:02 -04:00
tokens . restore_last_token ( )
break
2015-06-30 13:45:42 -04:00
elif inside_choice and string == ' , ' :
tokens . restore_last_token ( )
break
elif string == ' , ' :
2015-07-10 16:46:25 -04:00
if not current_selector :
raise syntax_error ( ' " , " must follow a format selector ' , start )
2015-06-28 16:08:29 -04:00
selectors . append ( current_selector )
current_selector = None
elif string == ' / ' :
2015-08-03 17:04:11 -04:00
if not current_selector :
raise syntax_error ( ' " / " must follow a format selector ' , start )
2015-06-28 16:08:29 -04:00
first_choice = current_selector
2015-06-30 13:45:42 -04:00
second_choice = _parse_format_selection ( tokens , inside_choice = True )
2015-07-04 15:30:26 -04:00
current_selector = FormatSelector ( PICKFIRST , ( first_choice , second_choice ) , [ ] )
2015-06-28 16:08:29 -04:00
elif string == ' [ ' :
if not current_selector :
current_selector = FormatSelector ( SINGLE , ' best ' , [ ] )
format_filter = _parse_filter ( tokens )
current_selector . filters . append ( format_filter )
2015-06-29 06:42:02 -04:00
elif string == ' ( ' :
if current_selector :
raise syntax_error ( ' Unexpected " ( " ' , start )
2015-06-30 13:45:42 -04:00
group = _parse_format_selection ( tokens , inside_group = True )
current_selector = FormatSelector ( GROUP , group , [ ] )
2015-06-28 16:08:29 -04:00
elif string == ' + ' :
2015-08-04 03:07:44 -04:00
if not current_selector :
raise syntax_error ( ' Unexpected " + " ' , start )
selector_1 = current_selector
selector_2 = _parse_format_selection ( tokens , inside_merge = True )
if not selector_2 :
raise syntax_error ( ' Expected a selector ' , start )
current_selector = FormatSelector ( MERGE , ( selector_1 , selector_2 ) , [ ] )
2015-06-28 16:08:29 -04:00
else :
raise syntax_error ( ' Operator not recognized: " {0} " ' . format ( string ) , start )
elif type == tokenize . ENDMARKER :
break
if current_selector :
selectors . append ( current_selector )
return selectors
2021-04-10 10:40:30 -04:00
def _merge ( formats_pair ) :
format_1 , format_2 = formats_pair
formats_info = [ ]
formats_info . extend ( format_1 . get ( ' requested_formats ' , ( format_1 , ) ) )
formats_info . extend ( format_2 . get ( ' requested_formats ' , ( format_2 , ) ) )
if not allow_multiple_streams [ ' video ' ] or not allow_multiple_streams [ ' audio ' ] :
2021-06-12 18:46:42 -04:00
get_no_more = { ' video ' : False , ' audio ' : False }
2021-04-10 10:40:30 -04:00
for ( i , fmt_info ) in enumerate ( formats_info ) :
2021-06-12 18:46:42 -04:00
if fmt_info . get ( ' acodec ' ) == fmt_info . get ( ' vcodec ' ) == ' none ' :
formats_info . pop ( i )
continue
for aud_vid in [ ' audio ' , ' video ' ] :
2021-04-10 10:40:30 -04:00
if not allow_multiple_streams [ aud_vid ] and fmt_info . get ( aud_vid [ 0 ] + ' codec ' ) != ' none ' :
if get_no_more [ aud_vid ] :
formats_info . pop ( i )
2021-07-31 06:29:52 -04:00
break
2021-04-10 10:40:30 -04:00
get_no_more [ aud_vid ] = True
if len ( formats_info ) == 1 :
return formats_info [ 0 ]
video_fmts = [ fmt_info for fmt_info in formats_info if fmt_info . get ( ' vcodec ' ) != ' none ' ]
audio_fmts = [ fmt_info for fmt_info in formats_info if fmt_info . get ( ' acodec ' ) != ' none ' ]
the_only_video = video_fmts [ 0 ] if len ( video_fmts ) == 1 else None
the_only_audio = audio_fmts [ 0 ] if len ( audio_fmts ) == 1 else None
output_ext = self . params . get ( ' merge_output_format ' )
if not output_ext :
if the_only_video :
output_ext = the_only_video [ ' ext ' ]
elif the_only_audio and not video_fmts :
output_ext = the_only_audio [ ' ext ' ]
else :
output_ext = ' mkv '
new_dict = {
' requested_formats ' : formats_info ,
' format ' : ' + ' . join ( fmt_info . get ( ' format ' ) for fmt_info in formats_info ) ,
' format_id ' : ' + ' . join ( fmt_info . get ( ' format_id ' ) for fmt_info in formats_info ) ,
' ext ' : output_ext ,
}
if the_only_video :
new_dict . update ( {
' width ' : the_only_video . get ( ' width ' ) ,
' height ' : the_only_video . get ( ' height ' ) ,
' resolution ' : the_only_video . get ( ' resolution ' ) or self . format_resolution ( the_only_video ) ,
' fps ' : the_only_video . get ( ' fps ' ) ,
' vcodec ' : the_only_video . get ( ' vcodec ' ) ,
' vbr ' : the_only_video . get ( ' vbr ' ) ,
' stretched_ratio ' : the_only_video . get ( ' stretched_ratio ' ) ,
} )
if the_only_audio :
new_dict . update ( {
' acodec ' : the_only_audio . get ( ' acodec ' ) ,
' abr ' : the_only_audio . get ( ' abr ' ) ,
} )
return new_dict
2021-05-04 11:54:00 -04:00
def _check_formats ( formats ) :
2021-06-26 22:05:58 -04:00
if not check_formats :
yield from formats
2021-07-07 11:35:58 -04:00
return
2021-05-04 11:54:00 -04:00
for f in formats :
self . to_screen ( ' [info] Testing format %s ' % f [ ' format_id ' ] )
2021-06-12 11:18:06 -04:00
temp_file = tempfile . NamedTemporaryFile (
suffix = ' .tmp ' , delete = False ,
dir = self . get_output_path ( ' temp ' ) or None )
temp_file . close ( )
2021-05-15 09:42:26 -04:00
try :
2021-06-26 22:05:58 -04:00
success , _ = self . dl ( temp_file . name , f , test = True )
except ( DownloadError , IOError , OSError , ValueError ) + network_exceptions :
success = False
2021-05-15 09:42:26 -04:00
finally :
2021-06-12 11:18:06 -04:00
if os . path . exists ( temp_file . name ) :
try :
os . remove ( temp_file . name )
except OSError :
self . report_warning ( ' Unable to delete temporary file " %s " ' % temp_file . name )
2021-06-26 22:05:58 -04:00
if success :
2021-05-04 11:54:00 -04:00
yield f
else :
self . to_screen ( ' [info] Unable to download format %s . Skipping... ' % f [ ' format_id ' ] )
2015-06-28 16:08:29 -04:00
def _build_selector_function ( selector ) :
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
if isinstance ( selector , list ) : # ,
2015-06-28 16:08:29 -04:00
fs = [ _build_selector_function ( s ) for s in selector ]
2016-07-15 13:55:43 -04:00
def selector_function ( ctx ) :
2015-06-28 16:08:29 -04:00
for f in fs :
2021-06-26 22:05:58 -04:00
yield from f ( ctx )
2015-06-28 16:08:29 -04:00
return selector_function
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
elif selector . type == GROUP : # ()
2015-06-29 06:42:02 -04:00
selector_function = _build_selector_function ( selector . selector )
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
elif selector . type == PICKFIRST : # /
2015-06-28 16:08:29 -04:00
fs = [ _build_selector_function ( s ) for s in selector . selector ]
2016-07-15 13:55:43 -04:00
def selector_function ( ctx ) :
2015-06-28 16:08:29 -04:00
for f in fs :
2016-07-15 13:55:43 -04:00
picked_formats = list ( f ( ctx ) )
2015-06-28 16:08:29 -04:00
if picked_formats :
return picked_formats
return [ ]
2021-06-26 22:05:58 -04:00
elif selector . type == MERGE : # +
selector_1 , selector_2 = map ( _build_selector_function , selector . selector )
def selector_function ( ctx ) :
for pair in itertools . product (
selector_1 ( copy . deepcopy ( ctx ) ) , selector_2 ( copy . deepcopy ( ctx ) ) ) :
yield _merge ( pair )
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
elif selector . type == SINGLE : # atom
2021-04-26 01:19:22 -04:00
format_spec = selector . selector or ' best '
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
2021-04-10 10:40:30 -04:00
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
if format_spec == ' all ' :
def selector_function ( ctx ) :
2021-06-26 22:05:58 -04:00
yield from _check_formats ( ctx [ ' formats ' ] )
2021-04-10 10:40:30 -04:00
elif format_spec == ' mergeall ' :
def selector_function ( ctx ) :
2021-06-26 22:05:58 -04:00
formats = list ( _check_formats ( ctx [ ' formats ' ] ) )
2021-04-10 12:59:58 -04:00
if not formats :
return
2021-04-13 01:23:25 -04:00
merged_format = formats [ - 1 ]
for f in formats [ - 2 : : - 1 ] :
2021-04-10 10:40:30 -04:00
merged_format = _merge ( ( merged_format , f ) )
yield merged_format
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
else :
2021-05-04 11:54:00 -04:00
format_fallback , format_reverse , format_idx = False , True , 1
2021-04-02 12:42:42 -04:00
mobj = re . match (
r ' (?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod> \ *)?(?: \ .(?P<n>[1-9] \ d*))?$ ' ,
format_spec )
if mobj is not None :
format_idx = int_or_none ( mobj . group ( ' n ' ) , default = 1 )
2021-05-04 11:54:00 -04:00
format_reverse = mobj . group ( ' bw ' ) [ 0 ] == ' b '
2021-04-02 12:42:42 -04:00
format_type = ( mobj . group ( ' type ' ) or [ None ] ) [ 0 ]
not_format_type = { ' v ' : ' a ' , ' a ' : ' v ' } . get ( format_type )
format_modified = mobj . group ( ' mod ' ) is not None
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
format_fallback = not format_type and not format_modified # for b, w
2021-06-12 16:02:19 -04:00
_filter_f = (
2021-04-02 12:42:42 -04:00
( lambda f : f . get ( ' %s codec ' % format_type ) != ' none ' )
if format_type and format_modified # bv*, ba*, wv*, wa*
else ( lambda f : f . get ( ' %s codec ' % not_format_type ) == ' none ' )
if format_type # bv, ba, wv, wa
else ( lambda f : f . get ( ' vcodec ' ) != ' none ' and f . get ( ' acodec ' ) != ' none ' )
if not format_modified # b, w
2021-06-12 16:02:19 -04:00
else lambda f : True ) # b*, w*
filter_f = lambda f : _filter_f ( f ) and (
f . get ( ' vcodec ' ) != ' none ' or f . get ( ' acodec ' ) != ' none ' )
2015-06-28 16:08:29 -04:00
else :
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
filter_f = ( ( lambda f : f . get ( ' ext ' ) == format_spec )
if format_spec in [ ' mp4 ' , ' flv ' , ' webm ' , ' 3gp ' , ' m4a ' , ' mp3 ' , ' ogg ' , ' aac ' , ' wav ' ] # extension
else ( lambda f : f . get ( ' format_id ' ) == format_spec ) ) # id
def selector_function ( ctx ) :
formats = list ( ctx [ ' formats ' ] )
matches = list ( filter ( filter_f , formats ) ) if filter_f is not None else formats
2021-05-04 11:54:00 -04:00
if format_fallback and ctx [ ' incomplete_formats ' ] and not matches :
Better Format Selection
* Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams
* New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio*
* Added b,w,v,a as alias for best, worst, video and audio respectively in format selection
* Changed video format sorting to show video only files and video+audio files together.
2020-11-05 10:35:36 -05:00
# for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format
2021-05-04 11:54:00 -04:00
matches = formats
2021-06-26 22:05:58 -04:00
matches = LazyList ( _check_formats ( matches [ : : - 1 if format_reverse else 1 ] ) )
try :
2021-05-04 11:54:00 -04:00
yield matches [ format_idx - 1 ]
2021-06-26 22:05:58 -04:00
except IndexError :
return
2015-01-22 18:04:05 -05:00
2015-06-28 16:08:29 -04:00
filters = [ self . _build_format_filter ( f ) for f in selector . filters ]
2015-01-22 18:04:05 -05:00
2016-07-15 13:55:43 -04:00
def final_selector ( ctx ) :
ctx_copy = copy . deepcopy ( ctx )
2015-06-28 16:08:29 -04:00
for _filter in filters :
2016-07-15 13:55:43 -04:00
ctx_copy [ ' formats ' ] = list ( filter ( _filter , ctx_copy [ ' formats ' ] ) )
return selector_function ( ctx_copy )
2015-06-28 16:08:29 -04:00
return final_selector
2015-01-22 18:04:05 -05:00
2015-06-28 16:08:29 -04:00
stream = io . BytesIO ( format_spec . encode ( ' utf-8 ' ) )
2015-06-29 06:42:02 -04:00
try :
2015-08-04 16:29:23 -04:00
tokens = list ( _remove_unused_ops ( compat_tokenize_tokenize ( stream . readline ) ) )
2015-06-29 06:42:02 -04:00
except tokenize . TokenError :
raise syntax_error ( ' Missing closing/opening brackets or parenthesis ' , ( 0 , len ( format_spec ) ) )
class TokenIterator ( object ) :
def __init__ ( self , tokens ) :
self . tokens = tokens
self . counter = 0
def __iter__ ( self ) :
return self
def __next__ ( self ) :
if self . counter > = len ( self . tokens ) :
raise StopIteration ( )
value = self . tokens [ self . counter ]
self . counter + = 1
return value
next = __next__
def restore_last_token ( self ) :
self . counter - = 1
parsed_selector = _parse_format_selection ( iter ( TokenIterator ( tokens ) ) )
2015-06-28 16:08:29 -04:00
return _build_selector_function ( parsed_selector )
2013-10-21 07:19:58 -04:00
2015-01-24 12:52:26 -05:00
def _calc_headers ( self , info_dict ) :
res = std_headers . copy ( )
add_headers = info_dict . get ( ' http_headers ' )
if add_headers :
res . update ( add_headers )
cookies = self . _calc_cookies ( info_dict )
if cookies :
res [ ' Cookie ' ] = cookies
2017-02-04 09:06:07 -05:00
if ' X-Forwarded-For ' not in res :
x_forwarded_for_ip = info_dict . get ( ' __x_forwarded_for_ip ' )
if x_forwarded_for_ip :
res [ ' X-Forwarded-For ' ] = x_forwarded_for_ip
2015-01-24 12:52:26 -05:00
return res
def _calc_cookies ( self , info_dict ) :
2015-11-21 11:18:17 -05:00
pr = sanitized_Request ( info_dict [ ' url ' ] )
2015-01-24 12:52:26 -05:00
self . cookiejar . add_cookie_header ( pr )
2015-02-17 10:29:24 -05:00
return pr . get_header ( ' Cookie ' )
2015-01-24 12:52:26 -05:00
2021-06-12 11:21:00 -04:00
def _sanitize_thumbnails ( self , info_dict ) :
2021-05-23 07:58:15 -04:00
thumbnails = info_dict . get ( ' thumbnails ' )
if thumbnails is None :
thumbnail = info_dict . get ( ' thumbnail ' )
if thumbnail :
info_dict [ ' thumbnails ' ] = thumbnails = [ { ' url ' : thumbnail } ]
if thumbnails :
thumbnails . sort ( key = lambda t : (
t . get ( ' preference ' ) if t . get ( ' preference ' ) is not None else - 1 ,
t . get ( ' width ' ) if t . get ( ' width ' ) is not None else - 1 ,
t . get ( ' height ' ) if t . get ( ' height ' ) is not None else - 1 ,
t . get ( ' id ' ) if t . get ( ' id ' ) is not None else ' ' ,
t . get ( ' url ' ) ) )
2021-06-12 11:21:00 -04:00
2021-07-15 13:19:59 -04:00
def thumbnail_tester ( ) :
if self . params . get ( ' check_formats ' ) :
2021-07-19 18:10:35 -04:00
test_all = True
to_screen = lambda msg : self . to_screen ( f ' [info] { msg } ' )
2021-07-15 13:19:59 -04:00
else :
2021-07-19 18:10:35 -04:00
test_all = False
2021-07-15 13:19:59 -04:00
to_screen = self . write_debug
def test_thumbnail ( t ) :
2021-07-19 18:10:35 -04:00
if not test_all and not t . get ( ' _test_url ' ) :
return True
2021-07-15 13:19:59 -04:00
to_screen ( ' Testing thumbnail %s ' % t [ ' id ' ] )
try :
self . urlopen ( HEADRequest ( t [ ' url ' ] ) )
except network_exceptions as err :
to_screen ( ' Unable to connect to thumbnail %s URL " %s " - %s . Skipping... ' % (
t [ ' id ' ] , t [ ' url ' ] , error_to_compat_str ( err ) ) )
return False
return True
return test_thumbnail
2021-06-12 11:21:00 -04:00
2021-05-23 07:58:15 -04:00
for i , t in enumerate ( thumbnails ) :
if t . get ( ' id ' ) is None :
t [ ' id ' ] = ' %d ' % i
2021-06-12 11:21:00 -04:00
if t . get ( ' width ' ) and t . get ( ' height ' ) :
t [ ' resolution ' ] = ' %d x %d ' % ( t [ ' width ' ] , t [ ' height ' ] )
t [ ' url ' ] = sanitize_url ( t [ ' url ' ] )
2021-07-15 13:19:59 -04:00
if self . params . get ( ' check_formats ' ) is not False :
info_dict [ ' thumbnails ' ] = LazyList ( filter ( thumbnail_tester ( ) , thumbnails [ : : - 1 ] ) ) . reverse ( )
else :
info_dict [ ' thumbnails ' ] = thumbnails
2021-05-23 07:58:15 -04:00
2013-07-02 04:08:58 -04:00
def process_video_result ( self , info_dict , download = True ) :
assert info_dict . get ( ' _type ' , ' video ' ) == ' video '
2014-04-03 08:36:40 -04:00
if ' id ' not in info_dict :
raise ExtractorError ( ' Missing " id " field in extractor result ' )
if ' title ' not in info_dict :
2021-08-18 21:49:23 -04:00
raise ExtractorError ( ' Missing " title " field in extractor result ' ,
video_id = info_dict [ ' id ' ] , ie = info_dict [ ' extractor ' ] )
2014-04-03 08:36:40 -04:00
2017-06-08 11:53:14 -04:00
def report_force_conversion ( field , field_not , conversion ) :
self . report_warning (
' " %s " field is not %s - forcing %s conversion, there is an error in extractor '
% ( field , field_not , conversion ) )
def sanitize_string_field ( info , string_field ) :
field = info . get ( string_field )
if field is None or isinstance ( field , compat_str ) :
return
report_force_conversion ( string_field , ' a string ' , ' string ' )
info [ string_field ] = compat_str ( field )
def sanitize_numeric_fields ( info ) :
for numeric_field in self . _NUMERIC_FIELDS :
field = info . get ( numeric_field )
if field is None or isinstance ( field , compat_numeric_types ) :
continue
report_force_conversion ( numeric_field , ' numeric ' , ' int ' )
info [ numeric_field ] = int_or_none ( field )
sanitize_string_field ( info_dict , ' id ' )
sanitize_numeric_fields ( info_dict )
2016-06-08 18:34:19 -04:00
2013-07-02 04:08:58 -04:00
if ' playlist ' not in info_dict :
# It isn't part of a playlist
info_dict [ ' playlist ' ] = None
info_dict [ ' playlist_index ' ] = None
2021-05-23 07:58:15 -04:00
self . _sanitize_thumbnails ( info_dict )
2014-06-07 09:33:45 -04:00
2016-04-07 14:17:47 -04:00
thumbnail = info_dict . get ( ' thumbnail ' )
2021-05-23 07:58:15 -04:00
thumbnails = info_dict . get ( ' thumbnails ' )
2016-04-07 14:17:47 -04:00
if thumbnail :
info_dict [ ' thumbnail ' ] = sanitize_url ( thumbnail )
elif thumbnails :
2014-06-07 09:33:45 -04:00
info_dict [ ' thumbnail ' ] = thumbnails [ - 1 ] [ ' url ' ]
2021-07-21 11:14:18 -04:00
if info_dict . get ( ' display_id ' ) is None and ' id ' in info_dict :
2014-03-03 06:06:28 -05:00
info_dict [ ' display_id ' ] = info_dict [ ' id ' ]
2021-03-14 19:22:06 -04:00
for ts_key , date_key in (
( ' timestamp ' , ' upload_date ' ) ,
( ' release_timestamp ' , ' release_date ' ) ,
) :
if info_dict . get ( date_key ) is None and info_dict . get ( ts_key ) is not None :
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728)
try :
upload_date = datetime . datetime . utcfromtimestamp ( info_dict [ ts_key ] )
info_dict [ date_key ] = upload_date . strftime ( ' % Y % m %d ' )
except ( ValueError , OverflowError , OSError ) :
pass
2014-03-13 10:30:25 -04:00
2021-07-21 11:14:18 -04:00
live_keys = ( ' is_live ' , ' was_live ' )
live_status = info_dict . get ( ' live_status ' )
if live_status is None :
for key in live_keys :
if info_dict . get ( key ) is False :
continue
if info_dict . get ( key ) :
live_status = key
break
if all ( info_dict . get ( key ) is False for key in live_keys ) :
live_status = ' not_live '
if live_status :
info_dict [ ' live_status ' ] = live_status
for key in live_keys :
if info_dict . get ( key ) is None :
info_dict [ key ] = ( live_status == key )
2016-01-15 13:09:54 -05:00
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ( ' chapter ' , ' season ' , ' episode ' ) :
if info_dict . get ( ' %s _number ' % field ) is not None and not info_dict . get ( field ) :
info_dict [ field ] = ' %s %d ' % ( field . capitalize ( ) , info_dict [ ' %s _number ' % field ] )
2018-05-08 11:57:52 -04:00
for cc_kind in ( ' subtitles ' , ' automatic_captions ' ) :
cc = info_dict . get ( cc_kind )
if cc :
for _ , subtitle in cc . items ( ) :
for subtitle_format in subtitle :
if subtitle_format . get ( ' url ' ) :
subtitle_format [ ' url ' ] = sanitize_url ( subtitle_format [ ' url ' ] )
if subtitle_format . get ( ' ext ' ) is None :
subtitle_format [ ' ext ' ] = determine_ext ( subtitle_format [ ' url ' ] ) . lower ( )
automatic_captions = info_dict . get ( ' automatic_captions ' )
2015-10-04 10:33:42 -04:00
subtitles = info_dict . get ( ' subtitles ' )
2015-02-16 15:44:17 -05:00
info_dict [ ' requested_subtitles ' ] = self . process_subtitles (
2018-05-08 11:57:52 -04:00
info_dict [ ' id ' ] , subtitles , automatic_captions )
2015-02-15 12:03:41 -05:00
2013-07-02 04:08:58 -04:00
# We now pick which formats have to be downloaded
if info_dict . get ( ' formats ' ) is None :
# There's only one format available
formats = [ info_dict ]
else :
formats = info_dict [ ' formats ' ]
2021-08-24 16:48:05 -04:00
info_dict [ ' __has_drm ' ] = any ( f . get ( ' has_drm ' ) for f in formats )
2021-08-22 16:08:38 -04:00
if not self . params . get ( ' allow_unplayable_formats ' ) :
formats = [ f for f in formats if not f . get ( ' has_drm ' ) ]
2014-03-10 15:55:47 -04:00
if not formats :
2021-08-18 21:49:23 -04:00
self . raise_no_formats ( info_dict )
2014-03-10 15:55:47 -04:00
2017-06-23 10:18:33 -04:00
def is_wellformed ( f ) :
url = f . get ( ' url ' )
2017-08-17 12:59:12 -04:00
if not url :
2017-06-23 10:18:33 -04:00
self . report_warning (
' " url " field is missing or empty - skipping format, '
' there is an error in extractor ' )
2017-08-17 12:59:12 -04:00
return False
if isinstance ( url , bytes ) :
sanitize_string_field ( f , ' url ' )
return True
2017-06-23 10:18:33 -04:00
# Filter out malformed formats for better extraction robustness
formats = list ( filter ( is_wellformed , formats ) )
2015-05-30 06:04:44 -04:00
formats_dict = { }
2013-07-02 04:08:58 -04:00
# We check that all the formats have the format and format_id fields
2014-03-10 15:55:47 -04:00
for i , format in enumerate ( formats ) :
2017-06-08 11:53:14 -04:00
sanitize_string_field ( format , ' format_id ' )
sanitize_numeric_fields ( format )
2016-03-26 09:37:41 -04:00
format [ ' url ' ] = sanitize_url ( format [ ' url ' ] )
2017-08-12 06:14:11 -04:00
if not format . get ( ' format_id ' ) :
2013-07-14 11:31:52 -04:00
format [ ' format_id ' ] = compat_str ( i )
2016-02-10 10:16:58 -05:00
else :
# Sanitize format_id from characters used in format selector expression
2017-01-02 07:08:07 -05:00
format [ ' format_id ' ] = re . sub ( r ' [ \ s,/+ \ [ \ ]()] ' , ' _ ' , format [ ' format_id ' ] )
2015-05-30 06:04:44 -04:00
format_id = format [ ' format_id ' ]
if format_id not in formats_dict :
formats_dict [ format_id ] = [ ]
formats_dict [ format_id ] . append ( format )
# Make sure all formats have unique format_id
for format_id , ambiguous_formats in formats_dict . items ( ) :
if len ( ambiguous_formats ) > 1 :
for i , format in enumerate ( ambiguous_formats ) :
format [ ' format_id ' ] = ' %s - %d ' % ( format_id , i )
for i , format in enumerate ( formats ) :
2013-10-21 08:09:38 -04:00
if format . get ( ' format ' ) is None :
2014-01-04 19:52:03 -05:00
format [ ' format ' ] = ' {id} - {res} {note} ' . format (
2013-10-21 08:09:38 -04:00
id = format [ ' format_id ' ] ,
res = self . format_resolution ( format ) ,
2021-08-07 07:20:46 -04:00
note = format_field ( format , ' format_note ' , ' ( %s ) ' ) ,
2013-10-21 08:09:38 -04:00
)
2013-10-28 06:28:02 -04:00
# Automatically determine file extension if missing
2016-08-20 21:07:26 -04:00
if format . get ( ' ext ' ) is None :
2014-04-03 02:55:38 -04:00
format [ ' ext ' ] = determine_ext ( format [ ' url ' ] ) . lower ( )
2016-01-15 23:10:28 -05:00
# Automatically determine protocol if missing (useful for format
# selection purposes)
2017-01-14 18:09:32 -05:00
if format . get ( ' protocol ' ) is None :
2016-01-15 23:10:28 -05:00
format [ ' protocol ' ] = determine_protocol ( format )
2015-01-24 12:52:26 -05:00
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict . copy ( )
full_format_info . update ( format )
format [ ' http_headers ' ] = self . _calc_headers ( full_format_info )
2017-02-04 09:06:07 -05:00
# Remove private housekeeping stuff
if ' __x_forwarded_for_ip ' in info_dict :
del info_dict [ ' __x_forwarded_for_ip ' ]
2013-07-02 04:08:58 -04:00
2013-12-24 06:25:22 -05:00
# TODO Central sorting goes here
2013-07-08 06:10:47 -04:00
2021-08-22 16:08:38 -04:00
if not formats or formats [ 0 ] is not info_dict :
2013-12-23 04:23:13 -05:00
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
2014-01-25 06:02:43 -05:00
# element in the 'formats' field in info_dict is info_dict itself,
2016-01-10 10:17:47 -05:00
# which can't be exported to json
2013-12-23 04:23:13 -05:00
info_dict [ ' formats ' ] = formats
2021-05-18 14:25:32 -04:00
info_dict , _ = self . pre_process ( info_dict )
2021-08-06 20:01:51 -04:00
if self . params . get ( ' list_thumbnails ' ) :
self . list_thumbnails ( info_dict )
if self . params . get ( ' listformats ' ) :
2021-08-13 16:21:47 -04:00
if not info_dict . get ( ' formats ' ) and not info_dict . get ( ' url ' ) :
2021-08-22 16:08:38 -04:00
self . to_screen ( ' %s has no formats ' % info_dict [ ' id ' ] )
else :
self . list_formats ( info_dict )
2021-08-06 20:01:51 -04:00
if self . params . get ( ' listsubtitles ' ) :
if ' automatic_captions ' in info_dict :
self . list_subtitles (
info_dict [ ' id ' ] , automatic_captions , ' automatic captions ' )
self . list_subtitles ( info_dict [ ' id ' ] , subtitles , ' subtitles ' )
list_only = self . params . get ( ' simulate ' ) is None and (
self . params . get ( ' list_thumbnails ' ) or self . params . get ( ' listformats ' ) or self . params . get ( ' listsubtitles ' ) )
2021-07-02 15:45:01 -04:00
if list_only :
2021-08-06 20:01:51 -04:00
# Without this printing, -F --print-json will not work
2021-07-02 15:45:01 -04:00
self . __forced_printings ( info_dict , self . prepare_filename ( info_dict ) , incomplete = True )
2013-12-18 15:24:39 -05:00
return
2021-06-11 09:43:22 -04:00
format_selector = self . format_selector
if format_selector is None :
2017-07-22 13:12:01 -04:00
req_format = self . _default_format_spec ( info_dict , download = download )
2021-05-14 03:45:29 -04:00
self . write_debug ( ' Default format spec: %s ' % req_format )
2021-06-11 09:43:22 -04:00
format_selector = self . build_format_selector ( req_format )
2016-07-15 13:55:43 -04:00
# While in format selection we may need to have an access to the original
# format set in order to calculate some metrics or do some processing.
# For now we need to be able to guess whether original formats provided
# by extractor are incomplete or not (i.e. whether extractor provides only
# video-only or audio-only formats) for proper formats selection for
# extractors with such incomplete formats (see
2019-03-09 07:14:41 -05:00
# https://github.com/ytdl-org/youtube-dl/pull/5556).
2016-07-15 13:55:43 -04:00
# Since formats may be filtered during format selection and may not match
# the original formats the results may be incorrect. Thus original formats
# or pre-calculated metrics should be passed to format selection routines
# as well.
# We will pass a context object containing all necessary additional data
# instead of just formats.
# This fixes incorrect format selection issue (see
2019-03-09 07:14:41 -05:00
# https://github.com/ytdl-org/youtube-dl/issues/10083).
2016-07-15 14:18:05 -04:00
incomplete_formats = (
2016-07-15 13:55:43 -04:00
# All formats are video-only or
2019-05-10 16:56:22 -04:00
all ( f . get ( ' vcodec ' ) != ' none ' and f . get ( ' acodec ' ) == ' none ' for f in formats )
2016-07-15 13:55:43 -04:00
# all formats are audio-only
2019-05-10 16:56:22 -04:00
or all ( f . get ( ' vcodec ' ) == ' none ' and f . get ( ' acodec ' ) != ' none ' for f in formats ) )
2016-07-15 13:55:43 -04:00
ctx = {
' formats ' : formats ,
' incomplete_formats ' : incomplete_formats ,
}
formats_to_download = list ( format_selector ( ctx ) )
2013-07-02 04:08:58 -04:00
if not formats_to_download :
2021-04-16 20:09:58 -04:00
if not self . params . get ( ' ignore_no_formats_error ' ) :
2021-08-18 21:49:23 -04:00
raise ExtractorError ( ' Requested format is not available ' , expected = True ,
video_id = info_dict [ ' id ' ] , ie = info_dict [ ' extractor ' ] )
2021-04-16 20:09:58 -04:00
else :
self . report_warning ( ' Requested format is not available ' )
2021-06-23 17:11:02 -04:00
# Process what we can, even without any available formats.
self . process_info ( dict ( info_dict ) )
2021-04-16 20:09:58 -04:00
elif download :
self . to_screen (
2021-05-18 14:20:59 -04:00
' [info] %s : Downloading %d format(s): %s ' % (
info_dict [ ' id ' ] , len ( formats_to_download ) ,
" , " . join ( [ f [ ' format_id ' ] for f in formats_to_download ] ) ) )
2021-04-16 20:09:58 -04:00
for fmt in formats_to_download :
2013-07-02 04:08:58 -04:00
new_info = dict ( info_dict )
2021-05-18 14:25:32 -04:00
# Save a reference to the original info_dict so that it can be modified in process_info if needed
new_info [ ' __original_infodict ' ] = info_dict
2021-04-16 20:09:58 -04:00
new_info . update ( fmt )
2013-07-02 04:08:58 -04:00
self . process_info ( new_info )
# We update the info dict with the best quality format (backwards compatibility)
2021-04-16 20:09:58 -04:00
if formats_to_download :
info_dict . update ( formats_to_download [ - 1 ] )
2013-07-02 04:08:58 -04:00
return info_dict
2015-02-22 05:37:27 -05:00
def process_subtitles ( self , video_id , normal_subtitles , automatic_captions ) :
2015-02-15 12:03:41 -05:00
""" Select the requested subtitles and their format """
2015-02-22 05:37:27 -05:00
available_subs = { }
if normal_subtitles and self . params . get ( ' writesubtitles ' ) :
available_subs . update ( normal_subtitles )
if automatic_captions and self . params . get ( ' writeautomaticsub ' ) :
for lang , cap_info in automatic_captions . items ( ) :
2015-02-16 15:44:17 -05:00
if lang not in available_subs :
available_subs [ lang ] = cap_info
2015-02-21 16:31:53 -05:00
if ( not self . params . get ( ' writesubtitles ' ) and not
self . params . get ( ' writeautomaticsub ' ) or not
available_subs ) :
return None
2015-02-15 12:03:41 -05:00
2021-04-19 17:17:09 -04:00
all_sub_langs = available_subs . keys ( )
2015-02-15 12:03:41 -05:00
if self . params . get ( ' allsubtitles ' , False ) :
2021-04-19 17:17:09 -04:00
requested_langs = all_sub_langs
elif self . params . get ( ' subtitleslangs ' , False ) :
requested_langs = set ( )
for lang in self . params . get ( ' subtitleslangs ' ) :
if lang == ' all ' :
requested_langs . update ( all_sub_langs )
continue
discard = lang [ 0 ] == ' - '
if discard :
lang = lang [ 1 : ]
current_langs = filter ( re . compile ( lang + ' $ ' ) . match , all_sub_langs )
if discard :
for lang in current_langs :
requested_langs . discard ( lang )
else :
requested_langs . update ( current_langs )
elif ' en ' in available_subs :
requested_langs = [ ' en ' ]
2015-02-15 12:03:41 -05:00
else :
2021-04-19 17:17:09 -04:00
requested_langs = [ list ( all_sub_langs ) [ 0 ] ]
2021-08-09 08:10:24 -04:00
if requested_langs :
self . write_debug ( ' Downloading subtitles: %s ' % ' , ' . join ( requested_langs ) )
2015-02-15 12:03:41 -05:00
formats_query = self . params . get ( ' subtitlesformat ' , ' best ' )
formats_preference = formats_query . split ( ' / ' ) if formats_query else [ ]
subs = { }
for lang in requested_langs :
formats = available_subs . get ( lang )
if formats is None :
self . report_warning ( ' %s subtitles not available for %s ' % ( lang , video_id ) )
continue
for ext in formats_preference :
if ext == ' best ' :
f = formats [ - 1 ]
break
matches = list ( filter ( lambda f : f [ ' ext ' ] == ext , formats ) )
if matches :
f = matches [ - 1 ]
break
else :
f = formats [ - 1 ]
self . report_warning (
' No subtitle format found matching " %s " for language %s , '
' using %s ' % ( formats_query , lang , f [ ' ext ' ] ) )
subs [ lang ] = f
return subs
2019-09-24 15:08:46 -04:00
def __forced_printings ( self , info_dict , filename , incomplete ) :
2021-05-14 03:44:38 -04:00
def print_mandatory ( field , actual_field = None ) :
if actual_field is None :
actual_field = field
2019-09-24 15:08:46 -04:00
if ( self . params . get ( ' force %s ' % field , False )
2021-05-14 03:44:38 -04:00
and ( not incomplete or info_dict . get ( actual_field ) is not None ) ) :
self . to_stdout ( info_dict [ actual_field ] )
2019-09-24 15:08:46 -04:00
def print_optional ( field ) :
if ( self . params . get ( ' force %s ' % field , False )
and info_dict . get ( field ) is not None ) :
self . to_stdout ( info_dict [ field ] )
2021-05-14 03:44:38 -04:00
info_dict = info_dict . copy ( )
if filename is not None :
info_dict [ ' filename ' ] = filename
if info_dict . get ( ' requested_formats ' ) is not None :
# For RTMP URLs, also include the playpath
info_dict [ ' urls ' ] = ' \n ' . join ( f [ ' url ' ] + f . get ( ' play_path ' , ' ' ) for f in info_dict [ ' requested_formats ' ] )
elif ' url ' in info_dict :
info_dict [ ' urls ' ] = info_dict [ ' url ' ] + info_dict . get ( ' play_path ' , ' ' )
2021-08-06 19:42:54 -04:00
if self . params . get ( ' forceprint ' ) or self . params . get ( ' forcejson ' ) :
self . post_extract ( info_dict )
2021-05-14 03:44:38 -04:00
for tmpl in self . params . get ( ' forceprint ' , [ ] ) :
if re . match ( r ' \ w+$ ' , tmpl ) :
tmpl = ' % ( {} )s ' . format ( tmpl )
tmpl , info_copy = self . prepare_outtmpl ( tmpl , info_dict )
2021-07-28 19:49:26 -04:00
self . to_stdout ( self . escape_outtmpl ( tmpl ) % info_copy )
2021-05-14 03:44:38 -04:00
2019-09-24 15:08:46 -04:00
print_mandatory ( ' title ' )
print_mandatory ( ' id ' )
2021-05-14 03:44:38 -04:00
print_mandatory ( ' url ' , ' urls ' )
2019-09-24 15:08:46 -04:00
print_optional ( ' thumbnail ' )
print_optional ( ' description ' )
2021-05-14 03:44:38 -04:00
print_optional ( ' filename ' )
2021-08-07 07:20:46 -04:00
if self . params . get ( ' forceduration ' ) and info_dict . get ( ' duration ' ) is not None :
2019-09-24 15:08:46 -04:00
self . to_stdout ( formatSeconds ( info_dict [ ' duration ' ] ) )
print_mandatory ( ' format ' )
2021-05-14 03:44:38 -04:00
2021-08-06 19:42:54 -04:00
if self . params . get ( ' forcejson ' ) :
2021-08-07 11:46:55 -04:00
self . to_stdout ( json . dumps ( self . sanitize_info ( info_dict ) ) )
2019-09-24 15:08:46 -04:00
2021-05-04 11:54:00 -04:00
def dl ( self , name , info , subtitle = False , test = False ) :
2021-08-22 16:08:38 -04:00
if not info . get ( ' url ' ) :
2021-08-18 21:49:23 -04:00
self . raise_no_formats ( info , True )
2021-05-04 11:54:00 -04:00
if test :
verbose = self . params . get ( ' verbose ' )
params = {
' test ' : True ,
' quiet ' : not verbose ,
' verbose ' : verbose ,
' noprogress ' : not verbose ,
' nopart ' : True ,
' skip_unavailable_fragments ' : False ,
' keep_fragments ' : False ,
' overwrites ' : True ,
' _no_ytdl_file ' : True ,
}
else :
params = self . params
2021-07-31 06:53:54 -04:00
fd = get_suitable_downloader ( info , params , to_stdout = ( name == ' - ' ) ) ( self , params )
2021-05-04 11:54:00 -04:00
if not test :
for ph in self . _progress_hooks :
fd . add_progress_hook ( ph )
2021-05-22 18:17:44 -04:00
urls = ' " , " ' . join ( [ f [ ' url ' ] for f in info . get ( ' requested_formats ' , [ ] ) ] or [ info [ ' url ' ] ] )
self . write_debug ( ' Invoking downloader on " %s " ' % urls )
2021-05-04 11:54:00 -04:00
new_info = dict ( info )
if new_info . get ( ' http_headers ' ) is None :
new_info [ ' http_headers ' ] = self . _calc_headers ( new_info )
return fd . download ( name , new_info , subtitle )
2013-06-18 16:14:21 -04:00
def process_info ( self , info_dict ) :
""" Process a single resolved IE result. """
assert info_dict . get ( ' _type ' , ' video ' ) == ' video '
2014-01-23 12:56:36 -05:00
max_downloads = self . params . get ( ' max_downloads ' )
if max_downloads is not None :
if self . _num_downloads > = int ( max_downloads ) :
raise MaxDownloadsReached ( )
2013-06-18 16:14:21 -04:00
2019-09-24 15:08:46 -04:00
# TODO: backward compatibility, to be removed
2013-06-18 16:14:21 -04:00
info_dict [ ' fulltitle ' ] = info_dict [ ' title ' ]
2021-06-23 17:11:02 -04:00
if ' format ' not in info_dict and ' ext ' in info_dict :
2013-06-18 16:14:21 -04:00
info_dict [ ' format ' ] = info_dict [ ' ext ' ]
2021-05-28 16:42:07 -04:00
if self . _match_entry ( info_dict ) is not None :
2013-06-18 16:14:21 -04:00
return
2021-02-28 09:56:08 -05:00
self . post_extract ( info_dict )
2014-01-23 12:56:36 -05:00
self . _num_downloads + = 1
2013-06-18 16:14:21 -04:00
2021-03-18 11:24:53 -04:00
# info_dict['_filename'] needs to be set for backward compatibility
2021-02-03 08:36:09 -05:00
info_dict [ ' _filename ' ] = full_filename = self . prepare_filename ( info_dict , warn = True )
temp_filename = self . prepare_filename ( info_dict , ' temp ' )
2021-01-23 07:18:12 -05:00
files_to_move = { }
2013-06-18 16:14:21 -04:00
# Forced printings
2021-06-23 17:11:02 -04:00
self . __forced_printings ( info_dict , full_filename , incomplete = ( ' format ' not in info_dict ) )
2013-06-18 16:14:21 -04:00
2021-08-06 20:01:51 -04:00
if self . params . get ( ' simulate ' ) :
2020-11-05 12:43:21 -05:00
if self . params . get ( ' force_write_download_archive ' , False ) :
self . record_download_archive ( info_dict )
# Do nothing else if in simulate mode
2013-06-18 16:14:21 -04:00
return
2021-02-03 08:36:09 -05:00
if full_filename is None :
2013-06-18 16:14:21 -04:00
return
2021-03-08 21:17:21 -05:00
if not self . _ensure_dir_exists ( encodeFilename ( full_filename ) ) :
2021-01-23 07:18:12 -05:00
return
2021-03-08 21:17:21 -05:00
if not self . _ensure_dir_exists ( encodeFilename ( temp_filename ) ) :
2013-06-18 16:14:21 -04:00
return
if self . params . get ( ' writedescription ' , False ) :
2021-02-03 08:36:09 -05:00
descfn = self . prepare_filename ( info_dict , ' description ' )
2021-03-08 21:17:21 -05:00
if not self . _ensure_dir_exists ( encodeFilename ( descfn ) ) :
2021-01-23 07:18:12 -05:00
return
2019-10-13 12:00:48 -04:00
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( descfn ) ) :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [info] Video description is already present ' )
2014-12-21 14:49:14 -05:00
elif info_dict . get ( ' description ' ) is None :
self . report_warning ( ' There \' s no description to write. ' )
2013-12-15 22:39:04 -05:00
else :
try :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [info] Writing video description to: ' + descfn )
2013-12-15 22:39:04 -05:00
with io . open ( encodeFilename ( descfn ) , ' w ' , encoding = ' utf-8 ' ) as descfile :
descfile . write ( info_dict [ ' description ' ] )
except ( OSError , IOError ) :
2014-01-04 19:52:03 -05:00
self . report_error ( ' Cannot write description file ' + descfn )
2013-12-15 22:39:04 -05:00
return
2013-06-18 16:14:21 -04:00
2013-10-14 01:18:58 -04:00
if self . params . get ( ' writeannotations ' , False ) :
2021-02-03 08:36:09 -05:00
annofn = self . prepare_filename ( info_dict , ' annotation ' )
2021-03-08 21:17:21 -05:00
if not self . _ensure_dir_exists ( encodeFilename ( annofn ) ) :
2021-01-23 07:18:12 -05:00
return
2019-10-13 12:00:48 -04:00
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( annofn ) ) :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [info] Video annotations are already present ' )
2019-08-09 03:19:41 -04:00
elif not info_dict . get ( ' annotations ' ) :
self . report_warning ( ' There are no annotations to write. ' )
2013-12-15 22:39:04 -05:00
else :
try :
2014-01-04 19:52:03 -05:00
self . to_screen ( ' [info] Writing video annotations to: ' + annofn )
2013-12-15 22:39:04 -05:00
with io . open ( encodeFilename ( annofn ) , ' w ' , encoding = ' utf-8 ' ) as annofile :
annofile . write ( info_dict [ ' annotations ' ] )
except ( KeyError , TypeError ) :
2014-01-04 19:52:03 -05:00
self . report_warning ( ' There are no annotations to write. ' )
2013-12-15 22:39:04 -05:00
except ( OSError , IOError ) :
2014-01-04 19:52:03 -05:00
self . report_error ( ' Cannot write annotations file: ' + annofn )
2013-12-15 22:39:04 -05:00
return
2013-10-14 01:18:58 -04:00
2013-06-25 18:02:15 -04:00
subtitles_are_requested = any ( [ self . params . get ( ' writesubtitles ' , False ) ,
2013-09-14 05:14:40 -04:00
self . params . get ( ' writeautomaticsub ' ) ] )
2013-06-25 18:02:15 -04:00
2015-02-16 15:12:31 -05:00
if subtitles_are_requested and info_dict . get ( ' requested_subtitles ' ) :
2013-06-18 16:14:21 -04:00
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
2015-02-16 15:12:31 -05:00
subtitles = info_dict [ ' requested_subtitles ' ]
2020-10-31 03:57:55 -04:00
# ie = self.get_info_extractor(info_dict['extractor_key'])
2015-02-15 12:03:41 -05:00
for sub_lang , sub_info in subtitles . items ( ) :
sub_format = sub_info [ ' ext ' ]
2021-04-10 18:18:07 -04:00
sub_filename = subtitles_filename ( temp_filename , sub_lang , sub_format , info_dict . get ( ' ext ' ) )
sub_filename_final = subtitles_filename (
self . prepare_filename ( info_dict , ' subtitle ' ) , sub_lang , sub_format , info_dict . get ( ' ext ' ) )
2019-10-13 12:00:48 -04:00
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( sub_filename ) ) :
2017-04-28 17:25:20 -04:00
self . to_screen ( ' [info] Video subtitle %s . %s is already present ' % ( sub_lang , sub_format ) )
2021-03-18 11:24:53 -04:00
sub_info [ ' filepath ' ] = sub_filename
2021-01-23 07:18:12 -05:00
files_to_move [ sub_filename ] = sub_filename_final
2015-02-15 12:03:41 -05:00
else :
2020-09-28 23:11:32 -04:00
self . to_screen ( ' [info] Writing video subtitles to: ' + sub_filename )
2017-04-28 17:25:20 -04:00
if sub_info . get ( ' data ' ) is not None :
try :
# Use newline='' to prevent conversion of newline characters
2019-03-09 07:14:41 -05:00
# See https://github.com/ytdl-org/youtube-dl/issues/10268
2017-04-28 17:25:20 -04:00
with io . open ( encodeFilename ( sub_filename ) , ' w ' , encoding = ' utf-8 ' , newline = ' ' ) as subfile :
subfile . write ( sub_info [ ' data ' ] )
2021-03-18 11:24:53 -04:00
sub_info [ ' filepath ' ] = sub_filename
2021-01-23 07:18:12 -05:00
files_to_move [ sub_filename ] = sub_filename_final
2017-04-28 17:25:20 -04:00
except ( OSError , IOError ) :
self . report_error ( ' Cannot write subtitles file ' + sub_filename )
return
2013-12-15 22:39:04 -05:00
else :
2017-04-28 17:25:20 -04:00
try :
2021-05-04 11:54:00 -04:00
self . dl ( sub_filename , sub_info . copy ( ) , subtitle = True )
2021-03-18 11:24:53 -04:00
sub_info [ ' filepath ' ] = sub_filename
2021-01-23 07:18:12 -05:00
files_to_move [ sub_filename ] = sub_filename_final
2021-05-15 09:42:26 -04:00
except ( ExtractorError , IOError , OSError , ValueError ) + network_exceptions as err :
2017-04-28 17:25:20 -04:00
self . report_warning ( ' Unable to download subtitle for " %s " : %s ' %
( sub_lang , error_to_compat_str ( err ) ) )
continue
2013-06-18 16:14:21 -04:00
if self . params . get ( ' writeinfojson ' , False ) :
2021-02-03 08:36:09 -05:00
infofn = self . prepare_filename ( info_dict , ' infojson ' )
2021-03-08 21:17:21 -05:00
if not self . _ensure_dir_exists ( encodeFilename ( infofn ) ) :
2021-01-23 07:18:12 -05:00
return
2019-10-13 12:00:48 -04:00
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( infofn ) ) :
2021-01-28 14:32:37 -05:00
self . to_screen ( ' [info] Video metadata is already present ' )
2013-12-15 22:39:04 -05:00
else :
2021-01-28 14:32:37 -05:00
self . to_screen ( ' [info] Writing video metadata as JSON to: ' + infofn )
2013-12-15 22:39:04 -05:00
try :
2021-08-04 18:07:16 -04:00
write_json_file ( self . sanitize_info ( info_dict , self . params . get ( ' clean_infojson ' , True ) ) , infofn )
2013-12-15 22:39:04 -05:00
except ( OSError , IOError ) :
2021-01-28 14:32:37 -05:00
self . report_error ( ' Cannot write video metadata to JSON file ' + infofn )
2013-12-15 22:39:04 -05:00
return
2021-02-03 08:36:09 -05:00
info_dict [ ' __infojson_filename ' ] = infofn
2013-06-18 16:14:21 -04:00
2021-04-10 18:18:07 -04:00
for thumb_ext in self . _write_thumbnails ( info_dict , temp_filename ) :
thumb_filename_temp = replace_extension ( temp_filename , thumb_ext , info_dict . get ( ' ext ' ) )
thumb_filename = replace_extension (
self . prepare_filename ( info_dict , ' thumbnail ' ) , thumb_ext , info_dict . get ( ' ext ' ) )
2021-03-18 11:24:53 -04:00
files_to_move [ thumb_filename_temp ] = thumb_filename
2013-06-18 16:14:21 -04:00
2020-10-27 06:37:21 -04:00
# Write internet shortcut files
url_link = webloc_link = desktop_link = False
if self . params . get ( ' writelink ' , False ) :
if sys . platform == " darwin " : # macOS.
webloc_link = True
elif sys . platform . startswith ( " linux " ) :
desktop_link = True
else : # if sys.platform in ['win32', 'cygwin']:
url_link = True
if self . params . get ( ' writeurllink ' , False ) :
url_link = True
if self . params . get ( ' writewebloclink ' , False ) :
webloc_link = True
if self . params . get ( ' writedesktoplink ' , False ) :
desktop_link = True
if url_link or webloc_link or desktop_link :
if ' webpage_url ' not in info_dict :
self . report_error ( ' Cannot write internet shortcut file because the " webpage_url " field is missing in the media information ' )
return
ascii_url = iri_to_uri ( info_dict [ ' webpage_url ' ] )
def _write_link_file ( extension , template , newline , embed_filename ) :
2021-01-23 07:18:12 -05:00
linkfn = replace_extension ( full_filename , extension , info_dict . get ( ' ext ' ) )
2021-01-23 10:52:15 -05:00
if self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( linkfn ) ) :
2020-10-27 06:37:21 -04:00
self . to_screen ( ' [info] Internet shortcut is already present ' )
else :
try :
self . to_screen ( ' [info] Writing internet shortcut to: ' + linkfn )
with io . open ( encodeFilename ( to_high_limit_path ( linkfn ) ) , ' w ' , encoding = ' utf-8 ' , newline = newline ) as linkfile :
template_vars = { ' url ' : ascii_url }
if embed_filename :
template_vars [ ' filename ' ] = linkfn [ : - ( len ( extension ) + 1 ) ]
linkfile . write ( template % template_vars )
except ( OSError , IOError ) :
self . report_error ( ' Cannot write internet shortcut ' + linkfn )
return False
return True
if url_link :
if not _write_link_file ( ' url ' , DOT_URL_LINK_TEMPLATE , ' \r \n ' , embed_filename = False ) :
return
if webloc_link :
if not _write_link_file ( ' webloc ' , DOT_WEBLOC_LINK_TEMPLATE , ' \n ' , embed_filename = False ) :
return
if desktop_link :
if not _write_link_file ( ' desktop ' , DOT_DESKTOP_LINK_TEMPLATE , ' \n ' , embed_filename = True ) :
return
2021-04-10 18:18:07 -04:00
try :
info_dict , files_to_move = self . pre_process ( info_dict , ' before_dl ' , files_to_move )
except PostProcessingError as err :
self . report_error ( ' Preprocessing: %s ' % str ( err ) )
return
2020-10-27 06:37:21 -04:00
must_record_download_archive = False
2021-04-10 18:18:07 -04:00
if self . params . get ( ' skip_download ' , False ) :
info_dict [ ' filepath ' ] = temp_filename
info_dict [ ' __finaldir ' ] = os . path . dirname ( os . path . abspath ( encodeFilename ( full_filename ) ) )
info_dict [ ' __files_to_move ' ] = files_to_move
info_dict = self . run_pp ( MoveFilesAfterDownloadPP ( self , False ) , info_dict )
else :
# Download
2021-08-07 07:20:46 -04:00
info_dict . setdefault ( ' __postprocessors ' , [ ] )
2014-09-25 12:37:20 -04:00
try :
2021-01-23 07:18:12 -05:00
2021-01-28 00:18:36 -05:00
def existing_file ( * filepaths ) :
ext = info_dict . get ( ' ext ' )
final_ext = self . params . get ( ' final_ext ' , ext )
existing_files = [ ]
for file in orderedSet ( filepaths ) :
if final_ext != ext :
converted = replace_extension ( file , final_ext , ext )
if os . path . exists ( encodeFilename ( converted ) ) :
existing_files . append ( converted )
if os . path . exists ( encodeFilename ( file ) ) :
existing_files . append ( file )
if not existing_files or self . params . get ( ' overwrites ' , False ) :
for file in orderedSet ( existing_files ) :
self . report_file_delete ( file )
os . remove ( encodeFilename ( file ) )
return None
info_dict [ ' ext ' ] = os . path . splitext ( existing_files [ 0 ] ) [ 1 ] [ 1 : ]
return existing_files [ 0 ]
2021-01-23 07:18:12 -05:00
success = True
2014-09-25 12:37:20 -04:00
if info_dict . get ( ' requested_formats ' ) is not None :
2015-04-17 17:00:35 -04:00
def compatible_formats ( formats ) :
2015-08-04 03:07:44 -04:00
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
video_formats = [ format for format in formats if format . get ( ' vcodec ' ) != ' none ' ]
audio_formats = [ format for format in formats if format . get ( ' acodec ' ) != ' none ' ]
if len ( video_formats ) > 2 or len ( audio_formats ) > 2 :
return False
2015-04-17 17:00:35 -04:00
# Check extension
2015-08-04 03:07:44 -04:00
exts = set ( format . get ( ' ext ' ) for format in formats )
COMPATIBLE_EXTS = (
set ( ( ' mp3 ' , ' mp4 ' , ' m4a ' , ' m4p ' , ' m4b ' , ' m4r ' , ' m4v ' , ' ismv ' , ' isma ' ) ) ,
set ( ( ' webm ' , ) ) ,
)
for ext_sets in COMPATIBLE_EXTS :
if ext_sets . issuperset ( exts ) :
return True
2015-04-17 17:00:35 -04:00
# TODO: Check acodec/vcodec
return False
requested_formats = info_dict [ ' requested_formats ' ]
2021-01-23 07:18:12 -05:00
old_ext = info_dict [ ' ext ' ]
2021-07-15 14:00:49 -04:00
if self . params . get ( ' merge_output_format ' ) is None and not compatible_formats ( requested_formats ) :
info_dict [ ' ext ' ] = ' mkv '
self . report_warning (
' Requested formats are incompatible for merge and will be merged into mkv. ' )
2021-08-05 10:15:37 -04:00
new_ext = info_dict [ ' ext ' ]
2021-01-23 07:18:12 -05:00
2021-08-05 10:15:37 -04:00
def correct_ext ( filename , ext = new_ext ) :
2021-07-31 06:53:54 -04:00
if filename == ' - ' :
return filename
2021-01-23 07:18:12 -05:00
filename_real_ext = os . path . splitext ( filename ) [ 1 ] [ 1 : ]
filename_wo_ext = (
os . path . splitext ( filename ) [ 0 ]
2021-08-05 10:15:37 -04:00
if filename_real_ext in ( old_ext , new_ext )
2021-01-23 07:18:12 -05:00
else filename )
2021-08-05 10:15:37 -04:00
return ' %s . %s ' % ( filename_wo_ext , ext )
2021-01-23 07:18:12 -05:00
2015-05-02 12:52:21 -04:00
# Ensure filename always has a correct extension for successful merge
2021-01-23 07:18:12 -05:00
full_filename = correct_ext ( full_filename )
temp_filename = correct_ext ( temp_filename )
dl_filename = existing_file ( full_filename , temp_filename )
2021-02-11 23:40:31 -05:00
info_dict [ ' __real_download ' ] = False
2021-05-22 18:17:44 -04:00
_protocols = set ( determine_protocol ( f ) for f in requested_formats )
2021-07-31 06:51:01 -04:00
if len ( _protocols ) == 1 : # All requested formats have same protocol
2021-05-22 18:17:44 -04:00
info_dict [ ' protocol ' ] = _protocols . pop ( )
2021-09-11 08:17:26 -04:00
directly_mergable = FFmpegFD . can_merge_formats ( info_dict , self . params )
2021-07-31 06:51:01 -04:00
if dl_filename is not None :
2021-08-13 11:14:50 -04:00
self . report_file_already_downloaded ( dl_filename )
2021-07-31 06:53:54 -04:00
elif ( directly_mergable and get_suitable_downloader (
2021-08-01 03:22:09 -04:00
info_dict , self . params , to_stdout = ( temp_filename == ' - ' ) ) == FFmpegFD ) :
2021-07-31 06:51:01 -04:00
info_dict [ ' url ' ] = ' \n ' . join ( f [ ' url ' ] for f in requested_formats )
success , real_download = self . dl ( temp_filename , info_dict )
info_dict [ ' __real_download ' ] = real_download
2021-05-22 18:17:44 -04:00
else :
downloaded = [ ]
merger = FFmpegMergerPP ( self )
if self . params . get ( ' allow_unplayable_formats ' ) :
self . report_warning (
' You have requested merging of multiple formats '
' while also allowing unplayable formats to be downloaded. '
' The formats won \' t be merged to prevent data corruption. ' )
elif not merger . available :
self . report_warning (
' You have requested merging of multiple formats but ffmpeg is not installed. '
' The formats won \' t be merged. ' )
2021-07-31 06:53:54 -04:00
if temp_filename == ' - ' :
reason = ( ' using a downloader other than ffmpeg ' if directly_mergable
else ' but the formats are incompatible for simultaneous download ' if merger . available
else ' but ffmpeg is not installed ' )
self . report_warning (
f ' You have requested downloading multiple formats to stdout { reason } . '
' The formats will be streamed one after the other ' )
fname = temp_filename
2021-07-31 06:51:01 -04:00
for f in requested_formats :
new_info = dict ( info_dict )
del new_info [ ' requested_formats ' ]
new_info . update ( f )
2021-07-31 06:53:54 -04:00
if temp_filename != ' - ' :
2021-08-05 10:15:37 -04:00
fname = prepend_extension (
correct_ext ( temp_filename , new_info [ ' ext ' ] ) ,
' f %s ' % f [ ' format_id ' ] , new_info [ ' ext ' ] )
2021-07-31 06:53:54 -04:00
if not self . _ensure_dir_exists ( fname ) :
return
downloaded . append ( fname )
2021-07-31 06:51:01 -04:00
partial_success , real_download = self . dl ( fname , new_info )
info_dict [ ' __real_download ' ] = info_dict [ ' __real_download ' ] or real_download
success = success and partial_success
if merger . available and not self . params . get ( ' allow_unplayable_formats ' ) :
info_dict [ ' __postprocessors ' ] . append ( merger )
info_dict [ ' __files_to_merge ' ] = downloaded
# Even if there were no downloads, it is being merged only now
info_dict [ ' __real_download ' ] = True
else :
for file in downloaded :
files_to_move [ file ] = None
2014-09-25 12:37:20 -04:00
else :
# Just a single file
2021-01-23 07:18:12 -05:00
dl_filename = existing_file ( full_filename , temp_filename )
2021-08-13 11:14:50 -04:00
if dl_filename is None or dl_filename == temp_filename :
# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
# So we should try to resume the download
2021-05-04 11:54:00 -04:00
success , real_download = self . dl ( temp_filename , info_dict )
2021-01-23 07:18:12 -05:00
info_dict [ ' __real_download ' ] = real_download
2021-08-13 11:14:50 -04:00
else :
self . report_file_already_downloaded ( dl_filename )
2021-01-23 07:18:12 -05:00
dl_filename = dl_filename or temp_filename
2021-01-23 10:25:45 -05:00
info_dict [ ' __finaldir ' ] = os . path . dirname ( os . path . abspath ( encodeFilename ( full_filename ) ) )
2021-01-23 07:18:12 -05:00
2021-05-04 13:06:18 -04:00
except network_exceptions as err :
2016-05-02 08:35:50 -04:00
self . report_error ( ' unable to download video data: %s ' % error_to_compat_str ( err ) )
2014-09-25 12:37:20 -04:00
return
except ( OSError , IOError ) as err :
raise UnavailableVideoError ( err )
except ( ContentTooShortError , ) as err :
self . report_error ( ' content too short (expected %s bytes and served %s ) ' % ( err . expected , err . downloaded ) )
return
2013-06-18 16:14:21 -04:00
2021-02-03 08:36:09 -05:00
if success and full_filename != ' - ' :
2016-03-01 15:08:50 -05:00
2021-06-19 18:19:23 -04:00
def fixup ( ) :
do_fixup = True
fixup_policy = self . params . get ( ' fixup ' )
vid = info_dict [ ' id ' ]
if fixup_policy in ( ' ignore ' , ' never ' ) :
return
elif fixup_policy == ' warn ' :
do_fixup = False
2021-06-19 18:45:19 -04:00
elif fixup_policy != ' force ' :
assert fixup_policy in ( ' detect_or_warn ' , None )
if not info_dict . get ( ' __real_download ' ) :
do_fixup = False
2021-06-19 18:19:23 -04:00
def ffmpeg_fixup ( cndn , msg , cls ) :
if not cndn :
return
if not do_fixup :
self . report_warning ( f ' { vid } : { msg } ' )
return
pp = cls ( self )
if pp . available :
info_dict [ ' __postprocessors ' ] . append ( pp )
else :
self . report_warning ( f ' { vid } : { msg } . Install ffmpeg to fix this automatically ' )
stretched_ratio = info_dict . get ( ' stretched_ratio ' )
ffmpeg_fixup (
stretched_ratio not in ( 1 , None ) ,
f ' Non-uniform pixel ratio { stretched_ratio } ' ,
FFmpegFixupStretchedPP )
ffmpeg_fixup (
( info_dict . get ( ' requested_formats ' ) is None
and info_dict . get ( ' container ' ) == ' m4a_dash '
and info_dict . get ( ' ext ' ) == ' m4a ' ) ,
' writing DASH m4a. Only some players support this container ' ,
FFmpegFixupM4aPP )
downloader = ( get_suitable_downloader ( info_dict , self . params ) . __name__
if ' protocol ' in info_dict else None )
ffmpeg_fixup ( downloader == ' HlsFD ' , ' malformed AAC bitstream detected ' , FFmpegFixupM3u8PP )
2021-06-21 13:23:17 -04:00
ffmpeg_fixup ( downloader == ' WebSocketFragmentFD ' , ' malformed timestamps detected ' , FFmpegFixupTimestampPP )
ffmpeg_fixup ( downloader == ' WebSocketFragmentFD ' , ' malformed duration detected ' , FFmpegFixupDurationPP )
2021-06-19 18:19:23 -04:00
fixup ( )
2013-06-18 16:14:21 -04:00
try :
2021-03-19 06:35:32 -04:00
info_dict = self . post_process ( dl_filename , info_dict , files_to_move )
2021-01-30 07:07:05 -05:00
except PostProcessingError as err :
self . report_error ( ' Postprocessing: %s ' % str ( err ) )
2013-06-18 16:14:21 -04:00
return
2020-12-29 10:03:07 -05:00
try :
for ph in self . _post_hooks :
2021-03-19 06:35:32 -04:00
ph ( info_dict [ ' filepath ' ] )
2020-12-29 10:03:07 -05:00
except Exception as err :
self . report_error ( ' post hooks: %s ' % str ( err ) )
return
2020-11-05 12:43:21 -05:00
must_record_download_archive = True
if must_record_download_archive or self . params . get ( ' force_write_download_archive ' , False ) :
self . record_download_archive ( info_dict )
2021-01-09 07:08:12 -05:00
max_downloads = self . params . get ( ' max_downloads ' )
if max_downloads is not None and self . _num_downloads > = int ( max_downloads ) :
raise MaxDownloadsReached ( )
2013-06-18 16:14:21 -04:00
def download ( self , url_list ) :
""" Download a given list of URLs. """
2021-02-03 08:36:09 -05:00
outtmpl = self . outtmpl_dict [ ' default ' ]
2019-05-10 16:56:22 -04:00
if ( len ( url_list ) > 1
and outtmpl != ' - '
and ' % ' not in outtmpl
and self . params . get ( ' max_downloads ' ) != 1 ) :
2014-04-30 04:02:03 -04:00
raise SameFileError ( outtmpl )
2013-06-18 16:14:21 -04:00
for url in url_list :
try :
2014-11-23 14:41:03 -05:00
# It also downloads the videos
2015-06-12 16:05:21 -04:00
res = self . extract_info (
url , force_generic_extractor = self . params . get ( ' force_generic_extractor ' , False ) )
2013-06-18 16:14:21 -04:00
except UnavailableVideoError :
2014-01-04 19:52:03 -05:00
self . report_error ( ' unable to download video ' )
2013-06-18 16:14:21 -04:00
except MaxDownloadsReached :
2021-08-15 04:12:23 -04:00
self . to_screen ( ' [info] Maximum number of downloads reached ' )
2021-01-12 20:01:01 -05:00
raise
except ExistingVideoReached :
2021-08-15 04:12:23 -04:00
self . to_screen ( ' [info] Encountered a video that is already in the archive, stopping due to --break-on-existing ' )
2021-01-12 20:01:01 -05:00
raise
except RejectedVideoReached :
2021-08-15 04:12:23 -04:00
self . to_screen ( ' [info] Encountered a video that did not match filter, stopping due to --break-on-reject ' )
2013-06-18 16:14:21 -04:00
raise
2014-10-24 18:30:57 -04:00
else :
if self . params . get ( ' dump_single_json ' , False ) :
2021-02-28 09:56:08 -05:00
self . post_extract ( res )
2021-08-07 11:46:55 -04:00
self . to_stdout ( json . dumps ( self . sanitize_info ( res ) ) )
2013-06-18 16:14:21 -04:00
return self . _download_retcode
2013-11-22 08:57:53 -05:00
def download_with_info_file ( self , info_filename ) :
2015-03-01 05:46:57 -05:00
with contextlib . closing ( fileinput . FileInput (
[ info_filename ] , mode = ' r ' ,
openhook = fileinput . hook_encoded ( ' utf-8 ' ) ) ) as f :
# FileInput doesn't have a read method, we can't call json.load
2021-08-04 18:07:16 -04:00
info = self . sanitize_info ( json . loads ( ' \n ' . join ( f ) ) , self . params . get ( ' clean_infojson ' , True ) )
2013-12-03 14:16:52 -05:00
try :
self . process_ie_result ( info , download = True )
2021-06-25 12:37:04 -04:00
except ( DownloadError , EntryNotInPlaylist , ThrottledDownload ) :
2013-12-03 14:16:52 -05:00
webpage_url = info . get ( ' webpage_url ' )
if webpage_url is not None :
2014-01-04 19:52:03 -05:00
self . report_warning ( ' The info failed to download, trying with " %s " ' % webpage_url )
2013-12-03 14:16:52 -05:00
return self . download ( [ webpage_url ] )
else :
raise
return self . _download_retcode
2013-11-22 08:57:53 -05:00
2015-04-30 14:44:34 -04:00
@staticmethod
2021-08-04 18:07:16 -04:00
def sanitize_info ( info_dict , remove_private_keys = False ) :
''' Sanitize the infodict for converting to json '''
2021-08-15 11:01:35 -04:00
if info_dict is None :
return info_dict
2021-08-07 11:46:55 -04:00
info_dict . setdefault ( ' epoch ' , int ( time . time ( ) ) )
remove_keys = { ' __original_infodict ' } # Always remove this since this may contain a copy of the entire dict
2021-05-28 12:15:06 -04:00
keep_keys = [ ' _type ' ] , # Always keep this to facilitate load-info-json
2021-08-04 18:07:16 -04:00
if remove_private_keys :
2021-08-07 11:46:55 -04:00
remove_keys | = {
' requested_formats ' , ' requested_subtitles ' , ' requested_entries ' ,
' filepath ' , ' entries ' , ' original_url ' , ' playlist_autonumber ' ,
}
2021-05-28 12:15:06 -04:00
empty_values = ( None , { } , [ ] , set ( ) , tuple ( ) )
reject = lambda k , v : k not in keep_keys and (
k . startswith ( ' _ ' ) or k in remove_keys or v in empty_values )
else :
reject = lambda k , v : k in remove_keys
2021-03-18 11:25:16 -04:00
filter_fn = lambda obj : (
2021-06-12 11:21:00 -04:00
list ( map ( filter_fn , obj ) ) if isinstance ( obj , ( LazyList , list , tuple , set ) )
2021-03-18 12:48:02 -04:00
else obj if not isinstance ( obj , dict )
2021-05-28 12:15:06 -04:00
else dict ( ( k , filter_fn ( v ) ) for k , v in obj . items ( ) if not reject ( k , v ) ) )
2021-03-18 11:25:16 -04:00
return filter_fn ( info_dict )
2015-04-30 14:44:34 -04:00
2021-08-04 18:07:16 -04:00
@staticmethod
def filter_requested_info ( info_dict , actually_filter = True ) :
''' Alias of sanitize_info for backward compatibility '''
return YoutubeDL . sanitize_info ( info_dict , actually_filter )
2021-03-18 11:24:53 -04:00
def run_pp ( self , pp , infodict ) :
2021-01-26 05:20:20 -05:00
files_to_delete = [ ]
2021-03-18 11:24:53 -04:00
if ' __files_to_move ' not in infodict :
infodict [ ' __files_to_move ' ] = { }
2021-01-30 07:07:05 -05:00
files_to_delete , infodict = pp . run ( infodict )
2021-01-26 05:20:20 -05:00
if not files_to_delete :
2021-03-18 11:24:53 -04:00
return infodict
2021-01-26 05:20:20 -05:00
if self . params . get ( ' keepvideo ' , False ) :
for f in files_to_delete :
2021-03-18 11:24:53 -04:00
infodict [ ' __files_to_move ' ] . setdefault ( f , ' ' )
2021-01-26 05:20:20 -05:00
else :
for old_filename in set ( files_to_delete ) :
self . to_screen ( ' Deleting original file %s (pass -k to keep) ' % old_filename )
try :
os . remove ( encodeFilename ( old_filename ) )
except ( IOError , OSError ) :
self . report_warning ( ' Unable to remove downloaded original file ' )
2021-03-18 11:24:53 -04:00
if old_filename in infodict [ ' __files_to_move ' ] :
del infodict [ ' __files_to_move ' ] [ old_filename ]
return infodict
2021-01-26 05:20:20 -05:00
2021-02-28 09:56:08 -05:00
@staticmethod
def post_extract ( info_dict ) :
def actual_post_extract ( info_dict ) :
if info_dict . get ( ' _type ' ) in ( ' playlist ' , ' multi_video ' ) :
for video_dict in info_dict . get ( ' entries ' , { } ) :
2021-04-01 04:46:10 -04:00
actual_post_extract ( video_dict or { } )
2021-02-28 09:56:08 -05:00
return
2021-05-18 14:20:59 -04:00
post_extractor = info_dict . get ( ' __post_extractor ' ) or ( lambda : { } )
2021-05-18 14:25:32 -04:00
extra = post_extractor ( ) . items ( )
info_dict . update ( extra )
2021-05-18 14:20:59 -04:00
info_dict . pop ( ' __post_extractor ' , None )
2021-02-28 09:56:08 -05:00
2021-05-18 14:25:32 -04:00
original_infodict = info_dict . get ( ' __original_infodict ' ) or { }
original_infodict . update ( extra )
original_infodict . pop ( ' __post_extractor ' , None )
2021-04-01 04:46:10 -04:00
actual_post_extract ( info_dict or { } )
2021-02-28 09:56:08 -05:00
2021-04-10 18:18:07 -04:00
def pre_process ( self , ie_info , key = ' pre_process ' , files_to_move = None ) :
2021-01-26 05:20:20 -05:00
info = dict ( ie_info )
2021-04-10 18:18:07 -04:00
info [ ' __files_to_move ' ] = files_to_move or { }
for pp in self . _pps [ key ] :
2021-03-18 11:24:53 -04:00
info = self . run_pp ( pp , info )
2021-04-10 18:18:07 -04:00
return info , info . pop ( ' __files_to_move ' , None )
2021-01-26 05:20:20 -05:00
2021-03-18 11:24:53 -04:00
def post_process ( self , filename , ie_info , files_to_move = None ) :
2013-06-18 16:14:21 -04:00
""" Run all the postprocessors on the given file. """
info = dict ( ie_info )
info [ ' filepath ' ] = filename
2021-03-18 11:24:53 -04:00
info [ ' __files_to_move ' ] = files_to_move or { }
2021-01-23 07:18:12 -05:00
2021-04-10 18:18:07 -04:00
for pp in ie_info . get ( ' __postprocessors ' , [ ] ) + self . _pps [ ' post_process ' ] :
2021-03-18 11:24:53 -04:00
info = self . run_pp ( pp , info )
info = self . run_pp ( MoveFilesAfterDownloadPP ( self ) , info )
del info [ ' __files_to_move ' ]
2021-04-10 18:18:07 -04:00
for pp in self . _pps [ ' after_move ' ] :
2021-03-18 11:24:53 -04:00
info = self . run_pp ( pp , info )
2021-03-19 06:35:32 -04:00
return info
2013-10-05 22:27:09 -04:00
2013-11-25 09:46:54 -05:00
def _make_archive_id ( self , info_dict ) :
2019-02-01 17:44:31 -05:00
video_id = info_dict . get ( ' id ' )
if not video_id :
return
2013-11-25 09:46:54 -05:00
# Future-proof against any change in case
# and backwards compatibility with prior versions
2019-02-01 17:44:31 -05:00
extractor = info_dict . get ( ' extractor_key ' ) or info_dict . get ( ' ie_key ' ) # key in a playlist
2013-11-22 16:46:46 -05:00
if extractor is None :
2019-02-07 13:08:48 -05:00
url = str_or_none ( info_dict . get ( ' url ' ) )
if not url :
return
2019-02-01 17:44:31 -05:00
# Try to find matching extractor for the URL and take its ie_key
2021-08-22 19:56:45 -04:00
for ie_key , ie in self . _ies . items ( ) :
2019-02-07 13:08:48 -05:00
if ie . suitable ( url ) :
2021-08-22 19:56:45 -04:00
extractor = ie_key
2019-02-01 17:44:31 -05:00
break
else :
return
2021-01-21 07:06:42 -05:00
return ' %s %s ' % ( extractor . lower ( ) , video_id )
2013-11-25 09:46:54 -05:00
def in_download_archive ( self , info_dict ) :
fn = self . params . get ( ' download_archive ' )
if fn is None :
return False
vid_id = self . _make_archive_id ( info_dict )
2019-02-01 17:44:31 -05:00
if not vid_id :
2013-11-22 16:46:46 -05:00
return False # Incomplete video information
2013-11-25 09:46:54 -05:00
2020-09-18 21:18:23 -04:00
return vid_id in self . archive
2013-10-05 22:27:09 -04:00
def record_download_archive ( self , info_dict ) :
fn = self . params . get ( ' download_archive ' )
if fn is None :
return
2013-11-25 09:46:54 -05:00
vid_id = self . _make_archive_id ( info_dict )
assert vid_id
2013-10-05 22:27:09 -04:00
with locked_file ( fn , ' a ' , encoding = ' utf-8 ' ) as archive_file :
2014-01-04 19:52:03 -05:00
archive_file . write ( vid_id + ' \n ' )
2020-09-18 21:18:23 -04:00
self . archive . add ( vid_id )
2013-07-02 04:08:58 -04:00
2013-10-21 08:09:38 -04:00
@staticmethod
2013-10-28 06:31:12 -04:00
def format_resolution ( format , default = ' unknown ' ) :
2013-11-25 16:34:56 -05:00
if format . get ( ' vcodec ' ) == ' none ' :
2021-06-12 16:02:19 -04:00
if format . get ( ' acodec ' ) == ' none ' :
return ' images '
2013-11-25 16:34:56 -05:00
return ' audio only '
2013-12-24 05:56:02 -05:00
if format . get ( ' resolution ' ) is not None :
return format [ ' resolution ' ]
2021-03-15 13:17:29 -04:00
if format . get ( ' width ' ) and format . get ( ' height ' ) :
res = ' %d x %d ' % ( format [ ' width ' ] , format [ ' height ' ] )
elif format . get ( ' height ' ) :
res = ' %s p ' % format [ ' height ' ]
elif format . get ( ' width ' ) :
2016-02-11 11:46:13 -05:00
res = ' %d x? ' % format [ ' width ' ]
2013-10-21 08:09:38 -04:00
else :
2013-10-28 06:31:12 -04:00
res = default
2013-10-21 08:09:38 -04:00
return res
2014-04-29 20:02:41 -04:00
def _format_note ( self , fdict ) :
res = ' '
if fdict . get ( ' ext ' ) in [ ' f4f ' , ' f4m ' ] :
res + = ' (unsupported) '
2016-01-01 07:28:45 -05:00
if fdict . get ( ' language ' ) :
if res :
res + = ' '
2016-03-20 12:01:45 -04:00
res + = ' [ %s ] ' % fdict [ ' language ' ]
2014-04-29 20:02:41 -04:00
if fdict . get ( ' format_note ' ) is not None :
res + = fdict [ ' format_note ' ] + ' '
if fdict . get ( ' tbr ' ) is not None :
res + = ' %4d k ' % fdict [ ' tbr ' ]
if fdict . get ( ' container ' ) is not None :
if res :
res + = ' , '
res + = ' %s container ' % fdict [ ' container ' ]
2019-05-10 16:56:22 -04:00
if ( fdict . get ( ' vcodec ' ) is not None
and fdict . get ( ' vcodec ' ) != ' none ' ) :
2014-04-29 20:02:41 -04:00
if res :
res + = ' , '
res + = fdict [ ' vcodec ' ]
2013-11-15 19:08:43 -05:00
if fdict . get ( ' vbr ' ) is not None :
2014-04-29 20:02:41 -04:00
res + = ' @ '
elif fdict . get ( ' vbr ' ) is not None and fdict . get ( ' abr ' ) is not None :
res + = ' video@ '
if fdict . get ( ' vbr ' ) is not None :
res + = ' %4d k ' % fdict [ ' vbr ' ]
2014-10-30 04:34:13 -04:00
if fdict . get ( ' fps ' ) is not None :
2016-03-09 14:03:18 -05:00
if res :
res + = ' , '
res + = ' %s fps ' % fdict [ ' fps ' ]
2014-04-29 20:02:41 -04:00
if fdict . get ( ' acodec ' ) is not None :
if res :
res + = ' , '
if fdict [ ' acodec ' ] == ' none ' :
res + = ' video only '
else :
res + = ' %-5s ' % fdict [ ' acodec ' ]
elif fdict . get ( ' abr ' ) is not None :
if res :
res + = ' , '
res + = ' audio '
if fdict . get ( ' abr ' ) is not None :
res + = ' @ %3d k ' % fdict [ ' abr ' ]
if fdict . get ( ' asr ' ) is not None :
res + = ' ( %5d Hz) ' % fdict [ ' asr ' ]
if fdict . get ( ' filesize ' ) is not None :
if res :
res + = ' , '
res + = format_bytes ( fdict [ ' filesize ' ] )
2014-07-21 06:02:44 -04:00
elif fdict . get ( ' filesize_approx ' ) is not None :
if res :
res + = ' , '
res + = ' ~ ' + format_bytes ( fdict [ ' filesize_approx ' ] )
2014-04-29 20:02:41 -04:00
return res
2013-11-15 19:08:43 -05:00
2014-04-29 20:02:41 -04:00
def list_formats ( self , info_dict ) :
2013-10-29 20:09:26 -04:00
formats = info_dict . get ( ' formats ' , [ info_dict ] )
2021-05-11 04:00:48 -04:00
new_format = (
' list-formats ' not in self . params . get ( ' compat_opts ' , [ ] )
2021-07-02 15:45:01 -04:00
and self . params . get ( ' listformats_table ' , True ) is not False )
2020-12-13 09:29:09 -05:00
if new_format :
table = [
[
format_field ( f , ' format_id ' ) ,
format_field ( f , ' ext ' ) ,
self . format_resolution ( f ) ,
format_field ( f , ' fps ' , ' %d ' ) ,
' | ' ,
format_field ( f , ' filesize ' , ' %s ' , func = format_bytes ) + format_field ( f , ' filesize_approx ' , ' ~ %s ' , func = format_bytes ) ,
format_field ( f , ' tbr ' , ' %4d k ' ) ,
2021-04-10 11:08:33 -04:00
shorten_protocol_name ( f . get ( ' protocol ' , ' ' ) . replace ( " native " , " n " ) ) ,
2020-12-13 09:29:09 -05:00
' | ' ,
format_field ( f , ' vcodec ' , default = ' unknown ' ) . replace ( ' none ' , ' ' ) ,
format_field ( f , ' vbr ' , ' %4d k ' ) ,
format_field ( f , ' acodec ' , default = ' unknown ' ) . replace ( ' none ' , ' ' ) ,
format_field ( f , ' abr ' , ' %3d k ' ) ,
format_field ( f , ' asr ' , ' %5d Hz ' ) ,
2021-07-19 19:59:42 -04:00
' , ' . join ( filter ( None , (
' UNSUPPORTED ' if f . get ( ' ext ' ) in ( ' f4f ' , ' f4m ' ) else ' ' ,
format_field ( f , ' language ' , ' [ %s ] ' ) ,
format_field ( f , ' format_note ' ) ,
format_field ( f , ' container ' , ignore = ( None , f . get ( ' ext ' ) ) ) ,
2021-07-25 17:52:39 -04:00
) ) ) ,
2021-07-19 19:59:42 -04:00
] for f in formats if f . get ( ' preference ' ) is None or f [ ' preference ' ] > = - 1000 ]
2020-12-13 09:29:09 -05:00
header_line = [ ' ID ' , ' EXT ' , ' RESOLUTION ' , ' FPS ' , ' | ' , ' FILESIZE ' , ' TBR ' , ' PROTO ' ,
2021-07-19 19:59:42 -04:00
' | ' , ' VCODEC ' , ' VBR ' , ' ACODEC ' , ' ABR ' , ' ASR ' , ' MORE INFO ' ]
2020-12-13 09:29:09 -05:00
else :
table = [
[
format_field ( f , ' format_id ' ) ,
format_field ( f , ' ext ' ) ,
self . format_resolution ( f ) ,
self . _format_note ( f ) ]
for f in formats
if f . get ( ' preference ' ) is None or f [ ' preference ' ] > = - 1000 ]
header_line = [ ' format code ' , ' extension ' , ' resolution ' , ' note ' ]
2013-10-29 10:09:45 -04:00
2015-01-24 20:38:47 -05:00
self . to_screen (
2021-07-02 15:45:01 -04:00
' [info] Available formats for %s : ' % info_dict [ ' id ' ] )
self . to_stdout ( render_table (
2021-07-03 12:23:13 -04:00
header_line , table , delim = new_format , extraGap = ( 0 if new_format else 1 ) , hideEmpty = new_format ) )
2015-01-24 20:38:47 -05:00
def list_thumbnails ( self , info_dict ) :
2021-06-12 11:21:00 -04:00
thumbnails = list ( info_dict . get ( ' thumbnails ' ) )
2015-01-24 20:38:47 -05:00
if not thumbnails :
2016-03-10 14:17:35 -05:00
self . to_screen ( ' [info] No thumbnails present for %s ' % info_dict [ ' id ' ] )
return
2015-01-24 20:38:47 -05:00
self . to_screen (
' [info] Thumbnails for %s : ' % info_dict [ ' id ' ] )
2021-07-02 15:45:01 -04:00
self . to_stdout ( render_table (
2015-01-24 20:38:47 -05:00
[ ' ID ' , ' width ' , ' height ' , ' URL ' ] ,
[ [ t [ ' id ' ] , t . get ( ' width ' , ' unknown ' ) , t . get ( ' height ' , ' unknown ' ) , t [ ' url ' ] ] for t in thumbnails ] ) )
2013-11-22 13:57:52 -05:00
2015-02-16 15:44:17 -05:00
def list_subtitles ( self , video_id , subtitles , name = ' subtitles ' ) :
2015-02-15 12:03:41 -05:00
if not subtitles :
2015-02-16 15:44:17 -05:00
self . to_screen ( ' %s has no %s ' % ( video_id , name ) )
2015-02-15 12:03:41 -05:00
return
self . to_screen (
2015-02-17 16:59:19 -05:00
' Available %s for %s : ' % ( name , video_id ) )
2021-05-12 15:37:58 -04:00
def _row ( lang , formats ) :
2021-06-25 13:40:31 -04:00
exts , names = zip ( * ( ( f [ ' ext ' ] , f . get ( ' name ' ) or ' unknown ' ) for f in reversed ( formats ) ) )
2021-05-12 15:37:58 -04:00
if len ( set ( names ) ) == 1 :
2021-05-17 06:41:07 -04:00
names = [ ] if names [ 0 ] == ' unknown ' else names [ : 1 ]
2021-05-12 15:37:58 -04:00
return [ lang , ' , ' . join ( names ) , ' , ' . join ( exts ) ]
2021-07-02 15:45:01 -04:00
self . to_stdout ( render_table (
2021-05-12 15:37:58 -04:00
[ ' Language ' , ' Name ' , ' Formats ' ] ,
[ _row ( lang , formats ) for lang , formats in subtitles . items ( ) ] ,
hideEmpty = True ) )
2015-02-15 12:03:41 -05:00
2013-11-22 13:57:52 -05:00
def urlopen ( self , req ) :
""" Start an HTTP download """
2015-11-19 16:08:34 -05:00
if isinstance ( req , compat_basestring ) :
2015-11-20 09:33:49 -05:00
req = sanitized_Request ( req )
2014-03-10 14:01:29 -04:00
return self . _opener . open ( req , timeout = self . _socket_timeout )
2013-11-22 13:57:52 -05:00
def print_debug_header ( self ) :
if not self . params . get ( ' verbose ' ) :
return
2014-03-30 00:02:41 -04:00
2014-11-12 09:30:26 -05:00
stdout_encoding = getattr (
sys . stdout , ' encoding ' , ' missing ( %s ) ' % type ( sys . stdout ) . __name__ )
2014-07-22 20:24:50 -04:00
encoding_str = (
2014-04-07 13:57:42 -04:00
' [debug] Encodings: locale %s , fs %s , out %s , pref %s \n ' % (
locale . getpreferredencoding ( ) ,
sys . getfilesystemencoding ( ) ,
2014-11-12 09:30:26 -05:00
stdout_encoding ,
2014-07-22 20:24:50 -04:00
self . get_encoding ( ) ) )
2014-07-24 07:29:44 -04:00
write_string ( encoding_str , encoding = None )
2014-04-07 13:57:42 -04:00
2021-02-14 12:10:54 -05:00
source = (
' (exe) ' if hasattr ( sys , ' frozen ' )
else ' (zip) ' if isinstance ( globals ( ) . get ( ' __loader__ ' ) , zipimporter )
else ' (source) ' if os . path . basename ( sys . argv [ 0 ] ) == ' __main__.py '
else ' ' )
self . _write_string ( ' [debug] yt-dlp version %s %s \n ' % ( __version__ , source ) )
2016-02-21 06:28:58 -05:00
if _LAZY_LOADER :
2021-01-24 08:40:02 -05:00
self . _write_string ( ' [debug] Lazy loading extractors enabled \n ' )
if _PLUGIN_CLASSES :
self . _write_string (
' [debug] Plugin Extractors: %s \n ' % [ ie . ie_key ( ) for ie in _PLUGIN_CLASSES ] )
2021-05-11 04:00:48 -04:00
if self . params . get ( ' compat_opts ' ) :
self . _write_string (
' [debug] Compatibility options: %s \n ' % ' , ' . join ( self . params . get ( ' compat_opts ' ) ) )
2013-11-22 13:57:52 -05:00
try :
sp = subprocess . Popen (
[ ' git ' , ' rev-parse ' , ' --short ' , ' HEAD ' ] ,
stdout = subprocess . PIPE , stderr = subprocess . PIPE ,
cwd = os . path . dirname ( os . path . abspath ( __file__ ) ) )
2021-01-09 07:26:12 -05:00
out , err = process_communicate_or_kill ( sp )
2013-11-22 13:57:52 -05:00
out = out . decode ( ) . strip ( )
if re . match ( ' [0-9a-f]+ ' , out ) :
2021-01-24 08:40:02 -05:00
self . _write_string ( ' [debug] Git HEAD: %s \n ' % out )
2015-03-27 08:02:20 -04:00
except Exception :
2013-11-22 13:57:52 -05:00
try :
sys . exc_clear ( )
2015-03-27 08:02:20 -04:00
except Exception :
2013-11-22 13:57:52 -05:00
pass
2018-01-01 09:52:24 -05:00
def python_implementation ( ) :
impl_name = platform . python_implementation ( )
if impl_name == ' PyPy ' and hasattr ( sys , ' pypy_version_info ' ) :
return impl_name + ' version %d . %d . %d ' % sys . pypy_version_info [ : 3 ]
return impl_name
2021-02-14 12:10:54 -05:00
self . _write_string ( ' [debug] Python version %s ( %s %s ) - %s \n ' % (
platform . python_version ( ) ,
python_implementation ( ) ,
platform . architecture ( ) [ 0 ] ,
2018-01-01 09:52:24 -05:00
platform_name ( ) ) )
2014-10-26 11:31:52 -04:00
2015-02-13 05:14:01 -05:00
exe_versions = FFmpegPostProcessor . get_versions ( self )
2014-11-02 04:55:36 -05:00
exe_versions [ ' rtmpdump ' ] = rtmpdump_version ( )
2017-08-03 08:17:25 -04:00
exe_versions [ ' phantomjs ' ] = PhantomJSwrapper . _version ( )
2014-10-26 11:31:52 -04:00
exe_str = ' , ' . join (
2021-08-08 17:36:14 -04:00
f ' { exe } { v } ' for exe , v in sorted ( exe_versions . items ( ) ) if v
) or ' none '
2014-10-26 11:31:52 -04:00
self . _write_string ( ' [debug] exe versions: %s \n ' % exe_str )
2013-11-22 13:57:52 -05:00
2021-08-08 17:36:14 -04:00
from . downloader . websocket import has_websockets
from . postprocessor . embedthumbnail import has_mutagen
from . cookies import SQLITE_AVAILABLE , KEYRING_AVAILABLE
2021-08-09 08:10:24 -04:00
lib_str = ' , ' . join ( sorted ( filter ( None , (
2021-09-17 15:21:27 -04:00
compat_pycrypto_AES and compat_pycrypto_AES . __name__ . split ( ' . ' ) [ 0 ] ,
2021-08-08 17:36:14 -04:00
has_websockets and ' websockets ' ,
has_mutagen and ' mutagen ' ,
SQLITE_AVAILABLE and ' sqlite ' ,
KEYRING_AVAILABLE and ' keyring ' ,
2021-08-09 08:10:24 -04:00
) ) ) ) or ' none '
2021-08-08 17:36:14 -04:00
self . _write_string ( ' [debug] Optional libraries: %s \n ' % lib_str )
2013-11-22 13:57:52 -05:00
proxy_map = { }
for handler in self . _opener . handlers :
if hasattr ( handler , ' proxies ' ) :
proxy_map . update ( handler . proxies )
2014-04-07 13:57:42 -04:00
self . _write_string ( ' [debug] Proxy map: ' + compat_str ( proxy_map ) + ' \n ' )
2013-11-22 13:57:52 -05:00
2015-01-10 15:02:27 -05:00
if self . params . get ( ' call_home ' , False ) :
ipaddr = self . urlopen ( ' https://yt-dl.org/ip ' ) . read ( ) . decode ( ' utf-8 ' )
self . _write_string ( ' [debug] Public IP address: %s \n ' % ipaddr )
2021-01-12 10:53:31 -05:00
return
2015-01-10 15:02:27 -05:00
latest_version = self . urlopen (
' https://yt-dl.org/latest/version ' ) . read ( ) . decode ( ' utf-8 ' )
if version_tuple ( latest_version ) > version_tuple ( __version__ ) :
self . report_warning (
' You are using an outdated version (newest version: %s )! '
' See https://yt-dl.org/update if you need help updating. ' %
latest_version )
2013-12-01 05:42:02 -05:00
def _setup_opener ( self ) :
2013-12-02 07:37:05 -05:00
timeout_val = self . params . get ( ' socket_timeout ' )
2014-03-10 14:01:29 -04:00
self . _socket_timeout = 600 if timeout_val is None else float ( timeout_val )
2013-12-02 07:37:05 -05:00
2021-07-21 16:32:49 -04:00
opts_cookiesfrombrowser = self . params . get ( ' cookiesfrombrowser ' )
2013-11-22 13:57:52 -05:00
opts_cookiefile = self . params . get ( ' cookiefile ' )
opts_proxy = self . params . get ( ' proxy ' )
2021-07-21 16:32:49 -04:00
self . cookiejar = load_cookies ( opts_cookiefile , opts_cookiesfrombrowser , self )
2013-11-22 13:57:52 -05:00
2015-09-05 20:21:33 -04:00
cookie_processor = YoutubeDLCookieProcessor ( self . cookiejar )
2013-11-22 13:57:52 -05:00
if opts_proxy is not None :
if opts_proxy == ' ' :
proxies = { }
else :
proxies = { ' http ' : opts_proxy , ' https ' : opts_proxy }
else :
proxies = compat_urllib_request . getproxies ( )
2019-03-09 07:14:41 -05:00
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2013-11-22 13:57:52 -05:00
if ' http ' in proxies and ' https ' not in proxies :
proxies [ ' https ' ] = proxies [ ' http ' ]
2015-03-02 18:03:06 -05:00
proxy_handler = PerRequestProxyHandler ( proxies )
2013-12-29 09:28:32 -05:00
debuglevel = 1 if self . params . get ( ' debug_printtraffic ' ) else 0
2015-01-10 13:55:36 -05:00
https_handler = make_HTTPS_handler ( self . params , debuglevel = debuglevel )
ydlh = YoutubeDLHandler ( self . params , debuglevel = debuglevel )
2020-02-29 07:08:44 -05:00
redirect_handler = YoutubeDLRedirectHandler ( )
2015-10-17 11:16:40 -04:00
data_handler = compat_urllib_request_DataHandler ( )
2016-01-14 02:14:01 -05:00
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
2019-03-09 07:14:41 -05:00
# https://github.com/ytdl-org/youtube-dl/issues/8227)
2016-01-14 02:14:01 -05:00
file_handler = compat_urllib_request . FileHandler ( )
def file_open ( * args , * * kwargs ) :
2021-02-24 13:45:56 -05:00
raise compat_urllib_error . URLError ( ' file:// scheme is explicitly disabled in yt-dlp for security reasons ' )
2016-01-14 02:14:01 -05:00
file_handler . file_open = file_open
opener = compat_urllib_request . build_opener (
2020-02-29 07:08:44 -05:00
proxy_handler , https_handler , cookie_processor , ydlh , redirect_handler , data_handler , file_handler )
2015-03-03 07:56:06 -05:00
2013-11-22 13:57:52 -05:00
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
2019-03-09 07:14:41 -05:00
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2013-11-22 13:57:52 -05:00
opener . addheaders = [ ]
self . _opener = opener
2014-03-30 00:02:41 -04:00
def encode ( self , s ) :
if isinstance ( s , bytes ) :
return s # Already encoded
try :
return s . encode ( self . get_encoding ( ) )
except UnicodeEncodeError as err :
err . reason = err . reason + ' . Check your system encoding configuration or use the --encoding option. '
raise
def get_encoding ( self ) :
encoding = self . params . get ( ' encoding ' )
if encoding is None :
encoding = preferredencoding ( )
return encoding
2015-01-24 21:11:12 -05:00
2021-02-03 08:36:09 -05:00
def _write_thumbnails ( self , info_dict , filename ) : # return the extensions
2021-02-09 12:42:32 -05:00
write_all = self . params . get ( ' write_all_thumbnails ' , False )
thumbnails = [ ]
if write_all or self . params . get ( ' writethumbnail ' , False ) :
2021-01-23 07:18:12 -05:00
thumbnails = info_dict . get ( ' thumbnails ' ) or [ ]
2021-02-09 12:42:32 -05:00
multiple = write_all and len ( thumbnails ) > 1
2015-01-24 21:11:12 -05:00
2021-01-23 07:18:12 -05:00
ret = [ ]
2021-06-26 22:05:58 -04:00
for t in thumbnails [ : : - 1 ] :
2015-01-24 21:11:12 -05:00
thumb_ext = determine_ext ( t [ ' url ' ] , ' jpg ' )
2021-02-09 12:42:32 -05:00
suffix = ' %s . ' % t [ ' id ' ] if multiple else ' '
thumb_display_id = ' %s ' % t [ ' id ' ] if multiple else ' '
2021-05-28 16:08:02 -04:00
thumb_filename = replace_extension ( filename , suffix + thumb_ext , info_dict . get ( ' ext ' ) )
2015-01-24 21:11:12 -05:00
2019-10-13 12:00:48 -04:00
if not self . params . get ( ' overwrites ' , True ) and os . path . exists ( encodeFilename ( thumb_filename ) ) :
2021-02-03 08:36:09 -05:00
ret . append ( suffix + thumb_ext )
2021-06-11 08:07:02 -04:00
t [ ' filepath ' ] = thumb_filename
2015-01-24 21:11:12 -05:00
self . to_screen ( ' [ %s ] %s : Thumbnail %s is already present ' %
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id ) )
else :
2021-02-28 19:09:50 -05:00
self . to_screen ( ' [ %s ] %s : Downloading thumbnail %s ... ' %
2015-01-24 21:11:12 -05:00
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id ) )
try :
uf = self . urlopen ( t [ ' url ' ] )
2015-08-30 16:01:13 -04:00
with open ( encodeFilename ( thumb_filename ) , ' wb ' ) as thumbf :
2015-01-24 21:11:12 -05:00
shutil . copyfileobj ( uf , thumbf )
2021-02-03 08:36:09 -05:00
ret . append ( suffix + thumb_ext )
2015-01-24 21:11:12 -05:00
self . to_screen ( ' [ %s ] %s : Writing thumbnail %s to: %s ' %
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id , thumb_filename ) )
2021-05-28 16:08:02 -04:00
t [ ' filepath ' ] = thumb_filename
2021-05-04 13:06:18 -04:00
except network_exceptions as err :
2015-01-24 21:11:12 -05:00
self . report_warning ( ' Unable to download thumbnail " %s " : %s ' %
2015-12-19 20:00:39 -05:00
( t [ ' url ' ] , error_to_compat_str ( err ) ) )
2021-02-09 12:42:32 -05:00
if ret and not write_all :
break
2021-01-23 07:18:12 -05:00
return ret