Allow using a custom format selector through API

Closes #1619, #1464
This commit is contained in:
pukkandan 2021-11-10 21:41:41 +05:30
parent 44bcb8d122
commit 093a17107e
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
2 changed files with 53 additions and 11 deletions

View file

@ -1600,14 +1600,14 @@ # EMBEDDING YT-DLP
```python
from yt_dlp import YoutubeDL
ydl_opts = {}
ydl_opts = {'format': 'bestaudio'}
with YoutubeDL(ydl_opts) as ydl:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452).
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json:
Here's a more complete example demonstrating various functionality:
```python
import json
@ -1633,23 +1633,56 @@ # EMBEDDING YT-DLP
print(msg)
# See the docstring of yt_dlp.postprocessor.common.PostProcessor
class MyCustomPP(yt_dlp.postprocessor.PostProcessor):
# See docstring of yt_dlp.postprocessor.common.PostProcessor.run
def run(self, info):
self.to_screen('Doing stuff')
return [], info
# See "progress_hooks" in the docstring of yt_dlp.YoutubeDL
def my_hook(d):
if d['status'] == 'finished':
print('Done downloading, now converting ...')
def format_selector(ctx):
""" Select the best video and the best audio that won't result in an mkv.
This is just an example and does not handle all cases """
# formats are already sorted worst to best
formats = ctx.get('formats')[::-1]
# acodec='none' means there is no audio
best_video = next(f for f in formats
if f['vcodec'] != 'none' and f['acodec'] == 'none')
# find compatible audio extension
audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
# vcodec='none' means there is no video
best_audio = next(f for f in formats if (
f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))
yield {
# These are the minimum required fields for a merged format
'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
'ext': best_video['ext'],
'requested_formats': [best_video, best_audio],
# Must be + seperated list of protocols
'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
}
# See docstring of yt_dlp.YoutubeDL for a description of the options
ydl_opts = {
'format': 'bestaudio/best',
'format': format_selector,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
# Embed metadata in video using ffmpeg.
# See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts
'key': 'FFmpegMetadata',
'add_chapters': True,
'add_metadata': True,
}],
'logger': MyLogger(),
'progress_hooks': [my_hook],
@ -1659,14 +1692,16 @@ # EMBEDDING YT-DLP
# Add custom headers
yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'})
# See the public functions in yt_dlp.YoutubeDL for for other available functions.
# Eg: "ydl.download", "ydl.download_with_info_file"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.add_post_processor(MyCustomPP())
info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc')
# ydl.sanitize_info makes the info json-serializable
print(json.dumps(ydl.sanitize_info(info)))
```
See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file`
**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above

View file

@ -211,6 +211,9 @@ class YoutubeDL(object):
simulate: Do not download the video files. If unset (or None),
simulate only if listsubtitles, listformats or list_thumbnails is used
format: Video format code. see "FORMAT SELECTION" for more details.
You can also pass a function. The function takes 'ctx' as
argument and returns the formats to download.
See "build_format_selector" for an implementation
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
ignore_no_formats_error: Ignore "No video formats" error. Usefull for
extracting metadata even if the video is not actually
@ -613,6 +616,7 @@ def check_deprecated(param, option, suggestion):
# Creating format selector here allows us to catch syntax errors before the extraction
self.format_selector = (
None if self.params.get('format') is None
else self.params['format'] if callable(self.params['format'])
else self.build_format_selector(self.params['format']))
self._setup_opener()
@ -1927,9 +1931,9 @@ def _merge(formats_pair):
'format_id': '+'.join(filtered('format_id')),
'ext': output_ext,
'protocol': '+'.join(map(determine_protocol, formats_info)),
'language': '+'.join(orderedSet(filtered('language'))),
'format_note': '+'.join(orderedSet(filtered('format_note'))),
'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
'language': '+'.join(orderedSet(filtered('language'))) or None,
'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
'tbr': sum(filtered('tbr', 'vbr', 'abr')),
}
@ -2357,6 +2361,9 @@ def is_wellformed(f):
info_dict, _ = self.pre_process(info_dict)
# The pre-processors may have modified the formats
formats = info_dict.get('formats', [info_dict])
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
if self.params.get('listformats'):