Update to ytdl-2021.02.04.1 except youtube

2024-11-07 20:30:41 -05:00 · 2021-02-04 13:26:01 +05:30 · 2021-02-04 13:26:01 +05:30 · 2181983a0c
commit 2181983a0c
parent e29663c644
24 changed files with 663 additions and 434 deletions
--- a/youtube_dlc/extractor/abcnews.py
+++ b/youtube_dlc/extractor/abcnews.py
@ -1,14 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import re
 import time
 from .amp import AMPIE
 from .common import InfoExtractor
-from .youtube import YoutubeIE
+from ..utils import (
-from ..compat import compat_urlparse
+    parse_duration,
    parse_iso8601,
    try_get,
 )
 class AbcNewsVideoIE(AMPIE):
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
                        (?:
                            abcnews\.go\.com/
                            (?:
-                                [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
+                                (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
-                                video/embed\?.*?\bid=
+                                video/(?:embed|itemfeed)\?.*?\bid=
                            )|
                            fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                        )
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
            'duration': 180,
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1380454200,
            'upload_date': '20130929',
        },
        'params': {
            # m3u8 download
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
    }, {
        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
        'only_matching': True,
    }, {
        'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
        'only_matching': True,
    }, {
        'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
    _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
    _TESTS = [{
-        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        # Youtube Embeds
        'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
        'info_dict': {
-            'id': '10505354',
+            'id': '51286501',
-            'ext': 'flv',
+            'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
-            'display_id': 'dramatic-video-rare-death-job-america',
+            'description': 'Billingsley went from a child actor to Hollywood power player.',
            'title': 'Occupational Hazards',
            'description': 'Nightline investigates the dangers that lurk at various jobs.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20100428',
            'timestamp': 1272412800,
        },
-        'add_ie': ['AbcNewsVideo'],
+        'playlist_count': 5,
    }, {
        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
        'info_dict': {
            'id': '38897857',
            'ext': 'mp4',
            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
            'title': 'Justin Timberlake Drops Hints For Secret Single',
            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
-            'upload_date': '20160515',
+            'upload_date': '20160505',
-            'timestamp': 1463329500,
+            'timestamp': 1462442280,
        },
        'params': {
            # m3u8 download
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
    }, {
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
    }, {
        # inline.type == 'video'
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        story_id = self._match_id(url)
-        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, story_id)
-        video_id = mobj.group('id')
+        story = self._parse_json(self._search_regex(
            r"window\['__abcnews__'\]\s*=\s*({.+?});",
            webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
        article_contents = story.get('articleContents') or {}
-        webpage = self._download_webpage(url, video_id)
+        def entries():
-        video_url = self._search_regex(
+            featured_video = story.get('featuredVideo') or {}
-            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
+            feed = try_get(featured_video, lambda x: x['video']['feed'])
-        full_video_url = compat_urlparse.urljoin(url, video_url)
+            if feed:
                yield {
                    '_type': 'url',
                    'id': featured_video.get('id'),
                    'title': featured_video.get('name'),
                    'url': feed,
                    'thumbnail': featured_video.get('images'),
                    'description': featured_video.get('description'),
                    'timestamp': parse_iso8601(featured_video.get('uploadDate')),
                    'duration': parse_duration(featured_video.get('duration')),
                    'ie_key': AbcNewsVideoIE.ie_key(),
                }
-        youtube_url = YoutubeIE._extract_url(webpage)
+            for inline in (article_contents.get('inlines') or []):
                inline_type = inline.get('type')
                if inline_type == 'iframe':
                    iframe_url = try_get(inline, lambda x: x['attrs']['src'])
                    if iframe_url:
                        yield self.url_result(iframe_url)
                elif inline_type == 'video':
                    video_id = inline.get('id')
                    if video_id:
                        yield {
                            '_type': 'url',
                            'id': video_id,
                            'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
                            'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
                            'description': inline.get('description'),
                            'duration': parse_duration(inline.get('duration')),
                            'ie_key': AbcNewsVideoIE.ie_key(),
                        }
-        timestamp = None
+        return self.playlist_result(
-        date_str = self._html_search_regex(
+            entries(), story_id, article_contents.get('headline'),
-            r'<span[^>]+class="timestamp">([^<]+)</span>',
+            article_contents.get('subHead'))
            webpage, 'timestamp', fatal=False)
        if date_str:
            tz_offset = 0
            if date_str.endswith(' ET'):  # Eastern Time
                tz_offset = -5
                date_str = date_str[:-3]
            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
            for date_format in date_formats:
                try:
                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
                except ValueError:
                    continue
            if timestamp is not None:
                timestamp -= tz_offset * 3600
        entry = {
            '_type': 'url_transparent',
            'ie_key': AbcNewsVideoIE.ie_key(),
            'url': full_video_url,
            'id': video_id,
            'display_id': display_id,
            'timestamp': timestamp,
        }
        if youtube_url:
            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
            return self.playlist_result(entries)
        return entry
--- a/youtube_dlc/extractor/adn.py
+++ b/youtube_dlc/extractor/adn.py
@ -26,6 +26,7 @@
    strip_or_none,
    try_get,
    unified_strdate,
    urlencode_postdata,
 )
@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
        }
    }
    _NETRC_MACHINE = 'animedigitalnetwork'
    _BASE_URL = 'http://animedigitalnetwork.fr'
    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
    _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
    _HEADERS = {}
    _LOGIN_ERR_MESSAGE = 'Unable to log in'
    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
    _POS_ALIGN_MAP = {
        'start': 1,
@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
            }])
        return subtitles
    def _real_initialize(self):
        username, password = self._get_login_info()
        if not username:
            return
        try:
            access_token = (self._download_json(
                self._API_BASE_URL + 'authentication/login', None,
                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
                data=urlencode_postdata({
                    'password': password,
                    'rememberMe': False,
                    'source': 'Web',
                    'username': username,
                })) or {}).get('accessToken')
            if access_token:
                self._HEADERS = {'authorization': 'Bearer ' + access_token}
        except ExtractorError as e:
            message = None
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                resp = self._parse_json(
                    e.cause.read().decode(), None, fatal=False) or {}
                message = resp.get('message') or resp.get('code')
            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
        player = self._download_json(
            video_base_url + 'configuration', video_id,
-            'Downloading player config JSON metadata')['player']
+            'Downloading player config JSON metadata',
            headers=self._HEADERS)['player']
        options = player['options']
        user = options['user']
        if not user.get('hasAccess'):
-            raise ExtractorError(
+            self.raise_login_required()
                'This video is only available for paying users', expected=True)
            # self.raise_login_required() # FIXME: Login is not implemented
        token = self._download_json(
            user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@ -188,8 +215,7 @@ def _real_extract(self, url):
                message = error.get('message')
                if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                    self.raise_geo_restricted(msg=message)
-                else:
+                raise ExtractorError(message)
                    raise ExtractorError(message)
        else:
            raise ExtractorError('Giving up retrying')
--- a/youtube_dlc/extractor/aenetworks.py
+++ b/youtube_dlc/extractor/aenetworks.py
@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
    _TESTS = [{
        'url': 'http://www.history.com/shows/ancient-aliens',
        'info_dict': {
-            'id': 'SH012427480000',
+            'id': 'SERIES1574',
            'title': 'Ancient Aliens',
            'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
        },
--- a/youtube_dlc/extractor/amp.py
+++ b/youtube_dlc/extractor/amp.py
@ -8,6 +8,7 @@
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    unified_timestamp,
    url_or_none,
 )
@ -88,7 +89,7 @@ def get_media_node(name, default=None):
        self._sort_formats(formats)
-        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
        return {
            'id': video_id,
--- a/youtube_dlc/extractor/awaan.py
+++ b/youtube_dlc/extractor/awaan.py
@ -48,6 +48,7 @@ def _parse_video_data(self, video_data, video_id, is_live):
            'duration': int_or_none(video_data.get('duration')),
            'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
            'is_live': is_live,
            'uploader_id': video_data.get('user_id'),
        }
@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
            'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'upload_date': '20150107',
            'timestamp': 1420588800,
            'uploader_id': '71',
        },
        'params': {
            # m3u8 download
--- a/youtube_dlc/extractor/azmedien.py
+++ b/youtube_dlc/extractor/azmedien.py
@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
        'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
        'only_matching': True
    }]
-    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
    _PARTNER_ID = '1719221'
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/bleacherreport.py
+++ b/youtube_dlc/extractor/bleacherreport.py
@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
    _TESTS = [{
        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
-        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
+        'md5': '670b2d73f48549da032861130488c681',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
            'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
            'upload_date': '20150723',
            'timestamp': 1437679032,
        },
        'expected_warnings': [
            'Unable to download f4m manifest'
        ]
    }]
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/bravotv.py
+++ b/youtube_dlc/extractor/bravotv.py
@ -12,7 +12,7 @@
 class BravoTVIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
        'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
    }, {
        'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
        'only_matching': True,
    }, {
        'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        settings = self._parse_json(self._search_regex(
            r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@ -53,11 +56,14 @@ def _real_extract(self, url):
                tp_path = release_pid = tve['release_pid']
            if tve.get('entitlement') == 'auth':
                adobe_pass = settings.get('tve_adobe_auth', {})
                if site == 'bravotv':
                    site = 'bravo'
                resource = self._get_mvpd_resource(
-                    adobe_pass.get('adobePassResourceId', 'bravo'),
+                    adobe_pass.get('adobePassResourceId') or site,
                    tve['title'], release_pid, tve.get('rating'))
                query['auth'] = self._extract_mvpd_auth(
-                    url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+                    url, release_pid,
                    adobe_pass.get('adobePassRequestorId') or site, resource)
        else:
            shared_playlist = settings['ls_playlist']
            account_pid = shared_playlist['account_pid']
--- a/youtube_dlc/extractor/ccma.py
+++ b/youtube_dlc/extractor/ccma.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import datetime
 import re
 from .common import InfoExtractor
@ -8,8 +9,8 @@
    clean_html,
    int_or_none,
    parse_duration,
    parse_iso8601,
    parse_resolution,
    try_get,
    url_or_none,
 )
@ -24,8 +25,9 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'L\'espot de La Marató de TV3',
            'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
-            'timestamp': 1470918540,
+            'timestamp': 1478608140,
-            'upload_date': '20160811',
+            'upload_date': '20161108',
            'age_limit': 0,
        }
    }, {
        'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@ -35,8 +37,24 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'El Consell de Savis analitza el derbi',
            'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
-            'upload_date': '20171205',
+            'upload_date': '20170512',
-            'timestamp': 1512507300,
+            'timestamp': 1494622500,
            'vcodec': 'none',
            'categories': ['Esports'],
        }
    }, {
        'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
        'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
        'info_dict': {
            'id': '6031387',
            'ext': 'mp4',
            'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
            'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
            'timestamp': 1582577700,
            'upload_date': '20200224',
            'subtitles': 'mincount:4',
            'age_limit': 16,
            'series': 'Crims',
        }
    }]
@ -72,17 +90,27 @@ def _real_extract(self, url):
        informacio = media['informacio']
        title = informacio['titol']
-        durada = informacio.get('durada', {})
+        durada = informacio.get('durada') or {}
        duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
-        timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+        tematica = try_get(informacio, lambda x: x['tematica']['text'])
        timestamp = None
        data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
        try:
            timestamp = datetime.datetime.strptime(
                data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
        except TypeError:
            pass
        subtitles = {}
-        subtitols = media.get('subtitols', {})
+        subtitols = media.get('subtitols') or []
-        if subtitols:
+        if isinstance(subtitols, dict):
-            sub_url = subtitols.get('url')
+            subtitols = [subtitols]
        for st in subtitols:
            sub_url = st.get('url')
            if sub_url:
                subtitles.setdefault(
-                    subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+                    st.get('iso') or st.get('text') or 'ca', []).append({
                        'url': sub_url,
                    })
@ -97,6 +125,16 @@ def _real_extract(self, url):
                    'height': int_or_none(imatges.get('alcada')),
                }]
        age_limit = None
        codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
        if codi_etic:
            codi_etic_s = codi_etic.split('_')
            if len(codi_etic_s) == 2:
                if codi_etic_s[1] == 'TP':
                    age_limit = 0
                else:
                    age_limit = int_or_none(codi_etic_s[1])
        return {
            'id': media_id,
            'title': title,
@ -106,4 +144,9 @@ def _real_extract(self, url):
            'thumbnails': thumbnails,
            'subtitles': subtitles,
            'formats': formats,
            'age_limit': age_limit,
            'alt_title': informacio.get('titol_complet'),
            'episode_number': int_or_none(informacio.get('capitol')),
            'categories': [tematica] if tematica else None,
            'series': informacio.get('programa'),
        }
--- a/youtube_dlc/extractor/cda.py
+++ b/youtube_dlc/extractor/cda.py
@ -96,7 +96,7 @@ def _real_extract(self, url):
            raise ExtractorError('This video is only available for premium users.', expected=True)
        need_confirm_age = False
-        if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
+        if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
                                   webpage, 'birthday validate form', default=None):
            webpage = self._download_age_confirm_page(
                url, video_id, note='Confirming age')
--- a/youtube_dlc/extractor/egghead.py
+++ b/youtube_dlc/extractor/egghead.py
@ -12,7 +12,14 @@
 )
-class EggheadCourseIE(InfoExtractor):
+class EggheadBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, resource, fatal=True):
        return self._download_json(
            'https://app.egghead.io/api/v1/' + path,
            video_id, 'Downloading %s JSON' % resource, fatal=fatal)
 class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-
+        series_path = 'series/' + playlist_id
-        lessons = self._download_json(
+        lessons = self._call_api(
-            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+            series_path + '/lessons', playlist_id, 'course lessons')
            playlist_id, 'Downloading course lessons JSON')
        entries = []
        for lesson in lessons:
@ -44,9 +50,8 @@ def _real_extract(self, url):
            entries.append(self.url_result(
                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
-        course = self._download_json(
+        course = self._call_api(
-            'https://egghead.io/api/v1/series/%s' % playlist_id,
+            series_path, playlist_id, 'course', False) or {}
            playlist_id, 'Downloading course JSON', fatal=False) or {}
        playlist_id = course.get('id')
        if playlist_id:
@ -57,7 +62,7 @@ def _real_extract(self, url):
            course.get('description'))
-class EggheadLessonIE(InfoExtractor):
+class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
            'upload_date': '20161209',
            'duration': 304,
            'view_count': 0,
-            'tags': ['javascript', 'free'],
+            'tags': 'count:2',
        },
        'params': {
            'skip_download': True,
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        lesson = self._download_json(
+        lesson = self._call_api(
-            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+            'lessons/' + display_id, display_id, 'lesson')
        lesson_id = compat_str(lesson['id'])
        title = lesson['title']
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -1308,6 +1308,7 @@
    TV2IE,
    TV2ArticleIE,
    KatsomoIE,
    MTVUutisetArticleIE,
 )
 from .tv2dk import (
    TV2DKIE,
@ -1448,7 +1449,6 @@
    VidmeUserIE,
    VidmeUserLikesIE,
 )
 from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
 from .viewlift import (
    ViewLiftIE,
@ -1508,6 +1508,7 @@
    VRVSeriesIE,
 )
 from .vshare import VShareIE
 from .vtm import VTMIE
 from .medialaan import MedialaanIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -131,6 +131,7 @@
 from .rcs import RCSEmbedsIE
 from .bitchute import BitChuteIE
 from .arcpublishing import ArcPublishingIE
 from .medialaan import MedialaanIE
 class GenericIE(InfoExtractor):
@ -2224,6 +2225,20 @@ class GenericIE(InfoExtractor):
                'duration': 1581,
            },
        },
        {
            # MyChannels SDK embed
            # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
            'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
            'md5': '90c0699c37006ef18e198c032d81739c',
            'info_dict': {
                'id': '194165',
                'ext': 'mp4',
                'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
                'timestamp': 1611740340,
                'upload_date': '20210127',
                'duration': 159,
            },
        },
    ]
    def report_following_redirect(self, new_url):
@ -2463,6 +2478,9 @@ def _real_extract(self, url):
        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)
        if '<title>DPG Media Privacy Gate</title>' in webpage:
            webpage = self._download_webpage(url, video_id)
        self.report_extraction(video_id)
        # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2594,6 +2612,11 @@ def _real_extract(self, url):
        if arc_urls:
            return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
        mychannels_urls = MedialaanIE._extract_urls(webpage)
        if mychannels_urls:
            return self.playlist_from_matches(
                mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
        # Look for embedded rtl.nl player
        matches = re.findall(
            r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
--- a/youtube_dlc/extractor/googledrive.py
+++ b/youtube_dlc/extractor/googledrive.py
@ -7,6 +7,7 @@
 from ..utils import (
    determine_ext,
    ExtractorError,
    get_element_by_class,
    int_or_none,
    lowercase_escape,
    try_get,
@ -237,7 +238,7 @@ def add_source_format(urlh):
                if confirmation_webpage:
                    confirm = self._search_regex(
                        r'confirm=([^&"\']+)', confirmation_webpage,
-                        'confirmation code', fatal=False)
+                        'confirmation code', default=None)
                    if confirm:
                        confirmed_source_url = update_url_query(source_url, {
                            'confirm': confirm,
@ -245,6 +246,11 @@ def add_source_format(urlh):
                        urlh = request_source_file(confirmed_source_url, 'confirmed source')
                        if urlh and urlh.headers.get('Content-Disposition'):
                            add_source_format(urlh)
                    else:
                        self.report_warning(
                            get_element_by_class('uc-error-subcaption', confirmation_webpage)
                            or get_element_by_class('uc-error-caption', confirmation_webpage)
                            or 'unable to extract confirmation code')
        if not formats and reason:
            raise ExtractorError(reason, expected=True)
--- a/youtube_dlc/extractor/medialaan.py
+++ b/youtube_dlc/extractor/medialaan.py
@ -2,268 +2,113 @@
 import re
-from .gigya import GigyaBaseIE
+from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    extract_attributes,
    int_or_none,
-    parse_duration,
+    mimetype2ext,
-    try_get,
+    parse_iso8601,
    unified_timestamp,
 )
-class MedialaanIE(GigyaBaseIE):
+class MedialaanIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.|nieuws\.)?
                        (?:
-                            (?P<site_id>vtm|q2|vtmkzoom)\.be/
+                            (?:embed\.)?mychannels.video/embed/|
-                            (?:
+                            embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
-                                video(?:/[^/]+/id/|/?\?.*?\baid=)|
+                            (?:www\.)?(?:
-                                (?:[^/]+/)*
+                                (?:
-                            )
+                                    7sur7|
                                    demorgen|
                                    hln|
                                    joe|
                                    qmusic
                                )\.be|
                                (?:
                                    [abe]d|
                                    bndestem|
                                    destentor|
                                    gelderlander|
                                    pzc|
                                    tubantia|
                                    volkskrant
                                )\.nl
                            )/video/(?:[^/]+/)*[^/?&#]+~p
                        )
-                        (?P<id>[^/?#&]+)
+                        (?P<id>\d+)
                    '''
    _NETRC_MACHINE = 'medialaan'
    _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
    _SITE_TO_APP_ID = {
        'vtm': 'vtm_watch',
        'q2': 'q2',
        'vtmkzoom': 'vtmkzoom',
    }
    _TESTS = [{
-        # vod
+        'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
        'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
        'info_dict': {
-            'id': 'vtm_20170219_VM0678361_vtmwatch',
+            'id': '193993',
            'ext': 'mp4',
-            'title': 'Allemaal Chris afl. 6',
+            'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
-            'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
+            'timestamp': 1611663540,
-            'timestamp': 1487533280,
+            'upload_date': '20210126',
-            'upload_date': '20170219',
+            'duration': 238,
            'duration': 2562,
            'series': 'Allemaal Chris',
            'season': 'Allemaal Chris',
            'season_number': 1,
            'season_id': '256936078124527',
            'episode': 'Allemaal Chris afl. 6',
            'episode_number': 6,
            'episode_id': '256936078591527',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Requires account credentials',
    }, {
-        # clip
+        'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
        'url': 'http://vtm.be/video?aid=168332',
        'info_dict': {
            'id': '168332',
            'ext': 'mp4',
            'title': '"Veronique liegt!"',
            'description': 'md5:1385e2b743923afe54ba4adc38476155',
            'timestamp': 1489002029,
            'upload_date': '20170308',
            'duration': 96,
        },
    }, {
        # vod
        'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
        'only_matching': True,
    }, {
-        # vod
+        'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
        'url': 'http://vtm.be/video?aid=163157',
        'only_matching': True,
    }, {
-        # vod
+        'url': 'https://embed.mychannels.video/script/production/193993',
        'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
        'only_matching': True,
    }, {
-        # clip
+        'url': 'https://embed.mychannels.video/production/193993',
        'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
        'only_matching': True,
    }, {
-        # http/s redirect
+        'url': 'https://mychannels.video/embed/193993',
-        'url': 'https://vtmkzoom.be/video?aid=45724',
+        'only_matching': True,
        'info_dict': {
            'id': '257136373657000',
            'ext': 'mp4',
            'title': 'K3 Dansstudio Ushuaia afl.6',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Requires account credentials',
    }, {
-        # nieuws.vtm.be
+        'url': 'https://embed.mychannels.video/embed/193993',
        'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
        'only_matching': True,
    }]
-    def _real_initialize(self):
+    @staticmethod
-        self._logged_in = False
+    def _extract_urls(webpage):
-
+        entries = []
-    def _login(self):
+        for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
-        username, password = self._get_login_info()
+            mychannels_id = extract_attributes(element).get('data-mychannels-id')
-        if username is None:
+            if mychannels_id:
-            self.raise_login_required()
+                entries.append('https://mychannels.video/embed/' + mychannels_id)
-
+        return entries
        auth_data = {
            'APIKey': self._APIKEY,
            'sdk': 'js_6.1',
            'format': 'json',
            'loginID': username,
            'password': password,
        }
        auth_info = self._gigya_login(auth_data)
        self._uid = auth_info['UID']
        self._uid_signature = auth_info['UIDSignature']
        self._signature_timestamp = auth_info['signatureTimestamp']
        self._logged_in = True
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        production_id = self._match_id(url)
-        video_id, site_id = mobj.group('id', 'site_id')
+        production = self._download_json(
            'https://embed.mychannels.video/sdk/production/' + production_id,
            production_id, query={'options': 'UUUU_default'})['productions'][0]
        title = production['title']
-        webpage = self._download_webpage(url, video_id)
+        formats = []
-
+        for source in (production.get('sources') or []):
-        config = self._parse_json(
+            src = source.get('src')
-            self._search_regex(
+            if not src:
-                r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
+                continue
-                webpage, 'config', default='{}'), video_id,
+            ext = mimetype2ext(source.get('type'))
-            transform_source=lambda s: s.replace(
+            if ext == 'm3u8':
-                '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
+                formats.extend(self._extract_m3u8_formats(
-
+                    src, production_id, 'mp4', 'm3u8_native',
-        vod_id = config.get('vodId') or self._search_regex(
+                    m3u8_id='hls', fatal=False))
            (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
             r'"vodId"\s*:\s*"(.+?)"',
             r'<[^>]+id=["\']vod-(\d+)'),
            webpage, 'video_id', default=None)
        # clip, no authentication required
        if not vod_id:
            player = self._parse_json(
                self._search_regex(
                    r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
                    default=''),
                video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
            if player:
                video = player[-1]
                if video['videoUrl'] in ('http', 'https'):
                    return self.url_result(video['url'], MedialaanIE.ie_key())
                info = {
                    'id': video_id,
                    'url': video['videoUrl'],
                    'title': video['title'],
                    'thumbnail': video.get('imageUrl'),
                    'timestamp': int_or_none(video.get('createdDate')),
                    'duration': int_or_none(video.get('duration')),
                }
            else:
-                info = self._parse_html5_media_entries(
+                formats.append({
-                    url, webpage, video_id, m3u8_id='hls')[0]
+                    'ext': ext,
-                info.update({
+                    'url': src,
                    'id': video_id,
                    'title': self._html_search_meta('description', webpage),
                    'duration': parse_duration(self._html_search_meta('duration', webpage)),
                })
-        # vod, authentication required
+        self._sort_formats(formats)
        else:
            if not self._logged_in:
                self._login()
-            settings = self._parse_json(
+        return {
-                self._search_regex(
+            'id': production_id,
-                    r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            'title': title,
-                    webpage, 'drupal settings', default='{}'),
+            'formats': formats,
-                video_id)
+            'thumbnail': production.get('posterUrl'),
-
+            'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
-            def get(container, item):
+            'duration': int_or_none(production.get('duration')) or None,
-                return try_get(
+        }
                    settings, lambda x: x[container][item],
                    compat_str) or self._search_regex(
                    r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
                    default=None)
            app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
            sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
            data = self._download_json(
                'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
                video_id, query={
                    'app_id': app_id,
                    'user_network': sso,
                    'UID': self._uid,
                    'UIDSignature': self._uid_signature,
                    'signatureTimestamp': self._signature_timestamp,
                })
            formats = self._extract_m3u8_formats(
                data['response']['uri'], video_id, entry_protocol='m3u8_native',
                ext='mp4', m3u8_id='hls')
            self._sort_formats(formats)
            info = {
                'id': vod_id,
                'formats': formats,
            }
            api_key = get('vod', 'apiKey')
            channel = get('medialaanGigya', 'channel')
            if api_key:
                videos = self._download_json(
                    'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
                    query={
                        'channels': channel,
                        'ids': vod_id,
                        'limit': 1,
                        'apikey': api_key,
                    })
                if videos:
                    video = try_get(
                        videos, lambda x: x['response']['videos'][0], dict)
                    if video:
                        def get(container, item, expected_type=None):
                            return try_get(
                                video, lambda x: x[container][item], expected_type)
                        def get_string(container, item):
                            return get(container, item, compat_str)
                        info.update({
                            'series': get_string('program', 'title'),
                            'season': get_string('season', 'title'),
                            'season_number': int_or_none(get('season', 'number')),
                            'season_id': get_string('season', 'id'),
                            'episode': get_string('episode', 'title'),
                            'episode_number': int_or_none(get('episode', 'number')),
                            'episode_id': get_string('episode', 'id'),
                            'duration': int_or_none(
                                video.get('duration')) or int_or_none(
                                video.get('durationMillis'), scale=1000),
                            'title': get_string('episode', 'title'),
                            'description': get_string('episode', 'text'),
                            'timestamp': unified_timestamp(get_string(
                                'publication', 'begin')),
                        })
            if not info.get('title'):
                info['title'] = try_get(
                    config, lambda x: x['videoConfig']['title'],
                    compat_str) or self._html_search_regex(
                    r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
                    default=None) or self._og_search_title(webpage)
        if not info.get('description'):
            info['description'] = self._html_search_regex(
                r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
                webpage, 'description', default=None)
        return info
--- a/youtube_dlc/extractor/pornhub.py
+++ b/youtube_dlc/extractor/pornhub.py
@ -22,11 +22,15 @@
    orderedSet,
    remove_quotes,
    str_to_int,
    update_url_query,
    urlencode_postdata,
    url_or_none,
 )
 class PornHubBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'pornhub'
    def _download_webpage_handle(self, *args, **kwargs):
        def dl(*args, **kwargs):
            return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@ -52,6 +56,66 @@ def dl(*args, **kwargs):
        return webpage, urlh
    def _real_initialize(self):
        self._logged_in = False
    def _login(self, host):
        if self._logged_in:
            return
        site = host.split('.')[0]
        # Both sites pornhub and pornhubpremium have separate accounts
        # so there should be an option to provide credentials for both.
        # At the same time some videos are available under the same video id
        # on both sites so that we have to identify them as the same video.
        # For that purpose we have to keep both in the same extractor
        # but under different netrc machines.
        username, password = self._get_login_info(netrc_machine=site)
        if username is None:
            return
        login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
        login_page = self._download_webpage(
            login_url, None, 'Downloading %s login page' % site)
        def is_logged(webpage):
            return any(re.search(p, webpage) for p in (
                r'class=["\']signOut',
                r'>Sign\s+[Oo]ut\s*<'))
        if is_logged(login_page):
            self._logged_in = True
            return
        login_form = self._hidden_inputs(login_page)
        login_form.update({
            'username': username,
            'password': password,
        })
        response = self._download_json(
            'https://www.%s/front/authenticate' % host, None,
            'Logging in to %s' % site,
            data=urlencode_postdata(login_form),
            headers={
                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                'Referer': login_url,
                'X-Requested-With': 'XMLHttpRequest',
            })
        if response.get('success') == '1':
            self._logged_in = True
            return
        message = response.get('message')
        if message is not None:
            raise ExtractorError(
                'Unable to login: %s' % message, expected=True)
        raise ExtractorError('Unable to log in')
 class PornHubIE(PornHubBaseIE):
    IE_DESC = 'PornHub and Thumbzilla'
@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
    }, {
        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
        'only_matching': True,
    }, {
        # Some videos are available with the same id on both premium
        # and non-premium sites (e.g. this and the following test)
        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
        'only_matching': True,
    }, {
        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
-            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
            webpage)
    def _extract_count(self, pattern, webpage, name):
@ -180,12 +252,7 @@ def _real_extract(self, url):
        host = mobj.group('host') or 'pornhub.com'
        video_id = mobj.group('id')
-        if 'premium' in host:
+        self._login(host)
            if not self._downloader.params.get('cookiefile'):
                raise ExtractorError(
                    'PornHub Premium requires authentication.'
                    ' You may want to use --cookies.',
                    expected=True)
        self._set_cookie(host, 'age_verified', '1')
@ -405,6 +472,10 @@ def extract_list(meta_key):
 class PornHubPlaylistBaseIE(PornHubBaseIE):
    def _extract_page(self, url):
        return int_or_none(self._search_regex(
            r'\bpage=(\d+)', url, 'page', default=None))
    def _extract_entries(self, webpage, host):
        # Only process container div with main playlist content skipping
        # drop-down menu that uses similar pattern for videos (see
@ -422,26 +493,6 @@ def _extract_entries(self, webpage, host):
                container))
        ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)
        entries = self._extract_entries(webpage, host)
        playlist = self._parse_json(
            self._search_regex(
                r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
                'playlist', default='{}'),
            playlist_id, fatal=False)
        title = playlist.get('title') or self._search_regex(
            r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
        return self.playlist_result(
            entries, playlist_id, title, playlist.get('description'))
 class PornHubUserIE(PornHubPlaylistBaseIE):
    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
    }, {
        'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
        'only_matching': True,
    }, {
        # Unavailable via /videos page, but available with direct pagination
        # on pornstar page (see [1]), requires premium
        # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
        'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
        'only_matching': True,
    }, {
        # Same as before, multi page
        'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user_id = mobj.group('id')
        videos_url = '%s/videos' % mobj.group('url')
        page = self._extract_page(url)
        if page:
            videos_url = update_url_query(videos_url, {'page': page})
        return self.url_result(
-            '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
+            videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
            video_id=user_id)
 class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@ -483,32 +547,55 @@ def _has_more(webpage):
                <button[^>]+\bid=["\']moreDataBtn
            ''', webpage) is not None
-    def _real_extract(self, url):
+    def _entries(self, url, host, item_id):
-        mobj = re.match(self._VALID_URL, url)
+        page = self._extract_page(url)
        host = mobj.group('host')
        item_id = mobj.group('id')
-        page = int_or_none(self._search_regex(
+        VIDEOS = '/videos'
            r'\bpage=(\d+)', url, 'page', default=None))
-        entries = []
+        def download_page(base_url, num, fallback=False):
-        for page_num in (page, ) if page is not None else itertools.count(1):
+            note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
            return self._download_webpage(
                base_url, item_id, note, query={'page': num})
        def is_404(e):
            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
        base_url = url
        has_page = page is not None
        first_page = page if has_page else 1
        for page_num in (first_page, ) if has_page else itertools.count(first_page):
            try:
-                webpage = self._download_webpage(
+                try:
-                    url, item_id, 'Downloading page %d' % page_num,
+                    webpage = download_page(base_url, page_num)
-                    query={'page': page_num})
+                except ExtractorError as e:
                    # Some sources may not be available via /videos page,
                    # trying to fallback to main page pagination (see [1])
                    # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
                    if is_404(e) and page_num == first_page and VIDEOS in base_url:
                        base_url = base_url.replace(VIDEOS, '')
                        webpage = download_page(base_url, page_num, fallback=True)
                    else:
                        raise
            except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                if is_404(e) and page_num != first_page:
                    break
                raise
            page_entries = self._extract_entries(webpage, host)
            if not page_entries:
                break
-            entries.extend(page_entries)
+            for e in page_entries:
                yield e
            if not self._has_more(webpage):
                break
-        return self.playlist_result(orderedSet(entries), item_id)
+    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        item_id = mobj.group('id')
        self._login(host)
        return self.playlist_result(self._entries(url, host, item_id), item_id)
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
--- a/youtube_dlc/extractor/svt.py
+++ b/youtube_dlc/extractor/svt.py
@ -255,8 +255,10 @@ def _real_extract(self, url):
            svt_id = self._search_regex(
                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
-                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
+                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                 r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
                webpage, 'video id')
        info_dict = self._extract_by_video_id(svt_id, webpage)
--- a/youtube_dlc/extractor/tv2.py
+++ b/youtube_dlc/extractor/tv2.py
@ -20,7 +20,7 @@
 class TV2IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.tv2.no/v/916509/',
        'info_dict': {
            'id': '916509',
@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
            'view_count': int,
            'categories': list,
        },
-    }
+    }]
    _API_DOMAIN = 'sumo.tv2.no'
    _PROTOCOLS = ('HDS', 'HLS', 'DASH')
    _GEO_COUNTRIES = ['NO']
@ -42,6 +42,12 @@ def _real_extract(self, url):
        video_id = self._match_id(url)
        api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
        asset = self._download_json(
            api_base + '.json', video_id,
            'Downloading metadata JSON')['asset']
        title = asset.get('subtitle') or asset['title']
        is_live = asset.get('live') is True
        formats = []
        format_urls = []
        for protocol in self._PROTOCOLS:
@ -81,7 +87,8 @@ def _real_extract(self, url):
                elif ext == 'm3u8':
                    if not data.get('drmProtected'):
                        formats.extend(self._extract_m3u8_formats(
-                            video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                            video_url, video_id, 'mp4',
                            'm3u8' if is_live else 'm3u8_native',
                            m3u8_id=format_id, fatal=False))
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(
@ -99,11 +106,6 @@ def _real_extract(self, url):
            raise ExtractorError('This video is DRM protected.', expected=True)
        self._sort_formats(formats)
        asset = self._download_json(
            api_base + '.json', video_id,
            'Downloading metadata JSON')['asset']
        title = asset['title']
        thumbnails = [{
            'id': thumbnail.get('@type'),
            'url': thumbnail.get('url'),
@ -112,7 +114,7 @@ def _real_extract(self, url):
        return {
            'id': video_id,
            'url': video_url,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
            'description': strip_or_none(asset.get('description')),
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(asset.get('createTime')),
@ -120,6 +122,7 @@ def _real_extract(self, url):
            'view_count': int_or_none(asset.get('views')),
            'categories': asset.get('keywords', '').split(','),
            'formats': formats,
            'is_live': is_live,
        }
@ -168,13 +171,13 @@ def _real_extract(self, url):
 class KatsomoIE(TV2IE):
-    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
        'info_dict': {
            'id': '1181321',
            'ext': 'mp4',
-            'title': 'MTV Uutiset Live',
+            'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
            'description': 'Päätöksen teki Pelicansin hallitus.',
            'timestamp': 1575116484,
            'upload_date': '20191130',
@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
            # m3u8 download
            'skip_download': True,
        },
-    }
+    }, {
        'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
        'only_matching': True,
    }, {
        'url': 'https://www.mtvuutiset.fi/video/prog1311159',
        'only_matching': True,
    }, {
        'url': 'https://www.katsomo.fi/#!/jakso/1311159',
        'only_matching': True,
    }]
    _API_DOMAIN = 'api.katsomo.fi'
    _PROTOCOLS = ('HLS', 'MPD')
    _GEO_COUNTRIES = ['FI']
 class MTVUutisetArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
        'info_dict': {
            'id': '1311159',
            'ext': 'mp4',
            'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
            'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
            'timestamp': 1600608966,
            'upload_date': '20200920',
            'duration': 153.7886666,
            'view_count': int,
            'categories': list,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        # multiple Youtube embeds
        'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        article_id = self._match_id(url)
        article = self._download_json(
            'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
            article_id)
        def entries():
            for video in (article.get('videos') or []):
                video_type = video.get('videotype')
                video_url = video.get('url')
                if not (video_url and video_type in ('katsomo', 'youtube')):
                    continue
                yield self.url_result(
                    video_url, video_type.capitalize(), video.get('video_id'))
        return self.playlist_result(
            entries(), article_id, article.get('title'), article.get('description'))
--- a/youtube_dlc/extractor/tv4.py
+++ b/youtube_dlc/extractor/tv4.py
@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
            tv4play\.se/
            (?:
-                (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
+                (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
                iframe/video/|
                film/|
                sport/|
@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
        {
            'url': 'http://www.tv4play.se/program/farang/3922081',
            'only_matching': True,
        },
        {
            'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
            'only_matching': True,
        }
    ]
--- a/youtube_dlc/extractor/vidio.py
+++ b/youtube_dlc/extractor/vidio.py
@ -4,7 +4,13 @@
 import re
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
    int_or_none,
    parse_iso8601,
    str_or_none,
    strip_or_none,
    try_get,
 )
 class VidioIE(InfoExtractor):
@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 149,
            'like_count': int,
            'uploader': 'TWELVE Pic',
            'timestamp': 1444902800,
            'upload_date': '20151015',
            'uploader_id': 'twelvepictures',
            'channel': 'Cover Music Video',
            'channel_id': '280236',
            'view_count': int,
            'dislike_count': int,
            'comment_count': int,
            'tags': 'count:4',
        },
    }, {
        'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
        'only_matching': True,
    }]
    def _real_initialize(self):
        self._api_key = self._download_json(
            'https://www.vidio.com/auth', None, data=b'')['api_key']
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id, display_id = re.match(self._VALID_URL, url).groups()
-        video_id, display_id = mobj.group('id', 'display_id')
+        data = self._download_json(
            'https://api.vidio.com/videos/' + video_id, display_id, headers={
                'Content-Type': 'application/vnd.api+json',
                'X-API-KEY': self._api_key,
            })
        video = data['videos'][0]
        title = video['title'].strip()
        webpage = self._download_webpage(url, display_id)
        title = self._og_search_title(webpage)
        m3u8_url, duration, thumbnail = [None] * 3
        clips = self._parse_json(
            self._html_search_regex(
                r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
                webpage, 'video data', default='[]', group='data'),
            display_id, fatal=False)
        if clips:
            clip = clips[0]
            m3u8_url = clip.get('sources', [{}])[0].get('file')
            duration = clip.get('clip_duration')
            thumbnail = clip.get('image')
        m3u8_url = m3u8_url or self._search_regex(
            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
            webpage, 'hls url', group='url')
        formats = self._extract_m3u8_formats(
-            m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+            data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
        self._sort_formats(formats)
-        duration = int_or_none(duration or self._search_regex(
+        get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
-            r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
+        channel = get_first('channel')
-            'duration', fatal=False, group='duration'))
+        user = get_first('user')
-        thumbnail = thumbnail or self._og_search_thumbnail(webpage)
+        username = user.get('username')
-
+        get_count = lambda x: int_or_none(video.get('total_' + x))
        like_count = int_or_none(self._search_regex(
            (r'<span[^>]+data-comment-vote-count=["\'](\d+)',
             r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
            webpage, 'like count', fatal=False))
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': self._og_search_description(webpage),
+            'description': strip_or_none(video.get('description')),
-            'thumbnail': thumbnail,
+            'thumbnail': video.get('image_url_medium'),
-            'duration': duration,
+            'duration': int_or_none(video.get('duration')),
-            'like_count': like_count,
+            'like_count': get_count('likes'),
            'formats': formats,
            'uploader': user.get('name'),
            'timestamp': parse_iso8601(video.get('created_at')),
            'uploader_id': username,
            'uploader_url': 'https://www.vidio.com/@' + username if username else None,
            'channel': channel.get('name'),
            'channel_id': str_or_none(channel.get('id')),
            'view_count': get_count('view_count'),
            'dislike_count': get_count('dislikes'),
            'comment_count': get_count('comments'),
            'tags': video.get('tag_list'),
        }
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@ -125,7 +125,7 @@ def _call_api(self, path_template, video_id, fields=None, limit=None):
                headers={'Referer': 'https://www.vlive.tv/'}, query=query)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self.raise_login_required(json.loads(e.cause.read().decode())['message'])
+                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
            raise
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/vtm.py
+++ b/youtube_dlc/extractor/vtm.py
@ -0,0 +1,62 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_iso8601,
    try_get,
 )
 class VTMIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
    _TEST = {
        'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
        'md5': '37dca85fbc3a33f2de28ceb834b071f8',
        'info_dict': {
            'id': '192445',
            'ext': 'mp4',
            'title': 'Gast vernielt Genkse hotelkamer',
            'timestamp': 1611060180,
            'upload_date': '20210119',
            'duration': 74,
            # TODO: fix url _type result processing
            # 'series': 'Op Interventie',
        }
    }
    def _real_extract(self, url):
        uuid = self._match_id(url)
        video = self._download_json(
            'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
            uuid, query={
                'query': '''{
  getComponent(type: Video, uuid: "%s") {
    ... on Video {
      description
      duration
      myChannelsVideo
      program {
        title
      }
      publishedAt
      title
    }
  }
 }''' % uuid,
            }, headers={
                'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
            })['data']['getComponent']
        return {
            '_type': 'url',
            'id': uuid,
            'title': video.get('title'),
            'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
            'description': video.get('description'),
            'timestamp': parse_iso8601(video.get('publishedAt')),
            'duration': int_or_none(video.get('duration')),
            'series': try_get(video, lambda x: x['program']['title']),
            'ie_key': 'Medialaan',
        }
--- a/youtube_dlc/extractor/vvvvid.py
+++ b/youtube_dlc/extractor/vvvvid.py
@ -4,6 +4,7 @@
 import re
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    ExtractorError,
    int_or_none,
@ -47,6 +48,22 @@ class VVVVIDIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
    }, {
        # video_type == 'video/youtube'
        'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
        'md5': '33e0edfba720ad73a8782157fdebc648',
        'info_dict': {
            'id': 'RzmFKUDOUgw',
            'ext': 'mp4',
            'title': 'Trailer',
            'upload_date': '20150906',
            'description': 'md5:a5e802558d35247fee285875328c0b80',
            'uploader_id': 'BandaiVisual',
            'uploader': 'BANDAI NAMCO Arts Channel',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
        'only_matching': True
@ -154,12 +171,13 @@ def metadata_from_url(r_url):
                    if season_number:
                        info['season_number'] = int(season_number)
-        for quality in ('_sd', ''):
+        video_type = video_data.get('video_type')
        is_youtube = False
        for quality in ('', '_sd'):
            embed_code = video_data.get('embed_info' + quality)
            if not embed_code:
                continue
            embed_code = ds(embed_code)
            video_type = video_data.get('video_type')
            if video_type in ('video/rcs', 'video/kenc'):
                if video_type == 'video/kenc':
                    kenc = self._download_json(
@ -172,19 +190,28 @@ def metadata_from_url(r_url):
                    if kenc_message:
                        embed_code += '?' + ds(kenc_message)
                formats.extend(self._extract_akamai_formats(embed_code, video_id))
            elif video_type == 'video/youtube':
                info.update({
                    '_type': 'url_transparent',
                    'ie_key': YoutubeIE.ie_key(),
                    'url': embed_code,
                })
                is_youtube = True
                break
            else:
                formats.extend(self._extract_wowza_formats(
                    'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
            metadata_from_url(embed_code)
-        self._sort_formats(formats)
+        if not is_youtube:
            self._sort_formats(formats)
            info['formats'] = formats
        metadata_from_url(video_data.get('thumbnail'))
        info.update(self._extract_common_video_info(video_data))
        info.update({
            'id': video_id,
            'title': title,
            'formats': formats,
            'duration': int_or_none(video_data.get('length')),
            'series': video_data.get('show_title'),
            'season_id': season_id,
--- a/youtube_dlc/extractor/zype.py
+++ b/youtube_dlc/extractor/zype.py
@ -87,11 +87,16 @@ def _real_extract(self, url):
                r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
                body, 'm3u8 url', group='url', default=None)
            if not m3u8_url:
-                source = self._parse_json(self._search_regex(
+                source = self._search_regex(
-                    r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
+                    r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
-                    'source'), video_id, js_to_json)
+
-                if source.get('integration') == 'verizon-media':
+                def get_attr(key):
-                    m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
+                    return self._search_regex(
                        r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
                        source, key, group='val')
                if get_attr('integration') == 'verizon-media':
                    m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
            formats = self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
            text_tracks = self._search_regex(