Add --list-extractor-descriptions (human-readable list of IEs)

2025-04-04 17:54:20 +00:00 · 2013-07-01 18:52:19 +02:00 · 2013-07-01 18:52:19 +02:00 · 0f81866329
commit 0f81866329
parent 2db67bc0f4
9 changed files with 34 additions and 22 deletions
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -35,6 +35,7 @@
 import getpass
 import optparse
 import os
+import random
 import re
 import shlex
 import socket
@ -142,6 +143,9 @@ def _find_term_columns():
    general.add_option('--list-extractors',
            action='store_true', dest='list_extractors',
            help='List all supported extractors and the URLs they would handle', default=False)
+    general.add_option('--list-extractor-descriptions',
+            action='store_true', dest='list_extractor_descriptions',
+            help='Output descriptions of all supported extractors', default=False)
    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')

@ -427,6 +431,18 @@ def _real_main(argv=None):
            for mu in matchedUrls:
                compat_print(u'  ' + mu)
        sys.exit(0)
+    if opts.list_extractor_descriptions:
+        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+            if not ie._WORKING:
+                continue
+            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+            if hasattr(ie, 'SEARCH_KEY'):
+                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
+                _COUNTS = (u'', u'5', u'10', u'all')
+                desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+            compat_print(desc)
+        sys.exit(0)
+

    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -12,8 +12,7 @@


 class ComedyCentralIE(InfoExtractor):
-    """Information extractor for The Daily Show and Colbert Report """
-
+    IE_DESC = u'The Daily Show / Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -263,3 +263,7 @@ def _real_extract(self, query):
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
        raise NotImplementedError("This method must be implemented by sublclasses")
+
+    @property
+    def SEARCH_KEY(self):
+        return self._SEARCH_KEY
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -11,8 +11,7 @@
 )

 class GenericIE(InfoExtractor):
-    """Generic last-resort information extractor."""
-
+    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = u'generic'
    _TEST = {
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@ -10,8 +10,7 @@


 class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
+    IE_DESC = u'Google Plus'
    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
    IE_NAME = u'plus.google'
    _TEST = {
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@ -8,7 +8,7 @@


 class GoogleSearchIE(SearchInfoExtractor):
-    """Information Extractor for Google Video search queries."""
+    IE_DESC = u'Google Video search'
    _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
    _MAX_RESULTS = 1000
    IE_NAME = u'video.google:search'
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -16,10 +16,9 @@


 class StanfordOpenClassroomIE(InfoExtractor):
-    """Information extractor for Stanford's Open ClassRoom"""
-
-    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    IE_NAME = u'stanfordoc'
+    IE_DESC = u'Stanford Open ClassRoom'
+    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    _TEST = {
        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
        u'file': u'PracticalUnix_intro-environment.mp4',
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -11,7 +11,7 @@
 )

 class YahooIE(InfoExtractor):
-    """Information extractor for screen.yahoo.com."""
+    IE_DESC = u'Yahoo screen'
    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
    _TEST = {
        u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -88,8 +88,7 @@ def _real_extract(self, url):
        return info_dict

 class YahooSearchIE(SearchInfoExtractor):
-    """Information Extractor for Yahoo! Video search queries."""
-
+    IE_DESC = u'Yahoo screen search'
    _MAX_RESULTS = 1000
    IE_NAME = u'screen.yahoo:search'
    _SEARCH_KEY = 'yvsearch'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -23,8 +23,7 @@


 class YoutubeIE(InfoExtractor):
-    """Information extractor for youtube.com."""
-
+    IE_DESC = u'YouTube.com'
    _VALID_URL = r"""^
                     (
                         (?:https?://)?                                       # http(s):// (optional)
@ -629,8 +628,7 @@ def _real_extract(self, url):
        return results

 class YoutubePlaylistIE(InfoExtractor):
-    """Information Extractor for YouTube playlists."""
-
+    IE_DESC = u'YouTube.com playlists'
    _VALID_URL = r"""(?:
                        (?:https?://)?
                        (?:\w+\.)?
@ -697,8 +695,7 @@ def _real_extract(self, url):


 class YoutubeChannelIE(InfoExtractor):
-    """Information Extractor for YouTube channels."""
-
+    IE_DESC = u'YouTube.com channels'
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@ -756,8 +753,7 @@ def _real_extract(self, url):


 class YoutubeUserIE(InfoExtractor):
-    """Information Extractor for YouTube users."""
-
+    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
@ -813,7 +809,7 @@ def _real_extract(self, url):
        return [self.playlist_result(url_results, playlist_title = username)]

 class YoutubeSearchIE(SearchInfoExtractor):
-    """Information Extractor for YouTube search queries."""
+    IE_DESC = u'YouTube.com searches'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
@ -856,6 +852,7 @@ def _get_n_results(self, query, n):


 class YoutubeShowIE(InfoExtractor):
+    IE_DESC = u'YouTube.com (multi-season) shows'
    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
    IE_NAME = u'youtube:show'