diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index bf42510040..32c14aa851 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,15 +21,15 @@ ## Checklist - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -44,7 +44,7 @@ ## Verbose log [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dlc version 2020.10.26 + [debug] youtube-dlc version 2020.10.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 889005097b..fe1aade055 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -21,15 +21,15 @@ ## Checklist - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dlcc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlcc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e5d7143887..cddb81dda4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,13 +21,13 @@ ## Checklist - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 9de52f98c9..920ae8dbca 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,16 +21,16 @@ ## Checklist - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -46,7 +46,7 @@ ## Verbose log [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dlc version 2020.10.26 + [debug] youtube-dlc version 2020.10.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 86fac96dde..7cc390f58b 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -21,13 +21,13 @@ ## Checklist - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md index 034a9c5ace..3c3ae0f3b1 100644 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -21,8 +21,8 @@ ## Checklist @@ -34,7 +34,7 @@ ## Checklist ## Question WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md index 8f9bb2c332..3fe4d6968d 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -18,10 +18,10 @@ ## Checklist diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md index 9748afd4d8..aad8fa0541 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -19,10 +19,10 @@ ## Checklist diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md index f274e8aeb5..2fb82f8286 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -18,8 +18,8 @@ ## Checklist diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md index 788f1c9a15..b7bebf8ab7 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -18,11 +18,11 @@ ## Checklist diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md index 9b3b8c3bfe..99592f79de 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -19,8 +19,8 @@ ## Checklist diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8db7e92f24..f5d94dc490 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -57,7 +57,7 @@ jobs: id: sha2_file env: SHA2: ${{ hashFiles('youtube-dlc') }} - run: echo "::set-output name=sha2_unix::${env:SHA2}" + run: echo "::set-output name=sha2_unix::$SHA2" - name: Install dependencies for pypi run: | python -m pip install --upgrade pip @@ -98,12 +98,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc.exe asset_name: youtube-dlc.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc.exe id: sha2_file_win env: - SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }} - run: echo "::set-output name=sha2_windows::${env:SHA2}" + SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }} + run: echo "::set-output name=sha2_windows::$SHA2_win" build_windows32: @@ -133,12 +133,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc_x86.exe asset_name: youtube-dlc_x86.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc_x86.exe id: sha2_file_win32 env: - SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} - run: echo "::set-output name=sha2_windows32::${env:SHA2}" + SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} + run: echo "::set-output name=sha2_windows32::$SHA2_win32" - name: Make SHA2-256SUMS file env: SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }} @@ -146,6 +146,18 @@ jobs: SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }} YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }} run: | - echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS - echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS - echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS + echo "version:$YTDLC_VERSION" >> SHA2-256SUMS + echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS + echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS + echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS + + - name: Upload 256SUMS file + id: upload-sums + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./SHA2-256SUMS + asset_name: SHA2-256SUMS + asset_content_type: text/plain diff --git a/README.md b/README.md index af65e96068..08bddaa187 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc) +[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc) [![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc) [![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc) @@ -7,7 +7,7 @@ youtube-dlc - download videos from youtube.com or other video platforms. -youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://github.com/ytdl-org/youtube-dl/issues/26462) +youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) - [INSTALLATION](#installation) - [DESCRIPTION](#description) @@ -36,6 +36,7 @@ - [VIDEO SELECTION](#video-selection-1) # INSTALLATION +[How to update](#update) **All Platforms** Preferred way using pip: @@ -46,16 +47,16 @@ # INSTALLATION **UNIX** (Linux, macOS, etc.) Using wget: - sudo wget https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -O /usr/local/bin/youtube-dlc + sudo wget https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc -O /usr/local/bin/youtube-dlc sudo chmod a+rx /usr/local/bin/youtube-dlc Using curl: - sudo curl -L https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -o /usr/local/bin/youtube-dlc + sudo curl -L https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc -o /usr/local/bin/youtube-dlc sudo chmod a+rx /usr/local/bin/youtube-dlc -**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!). +**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!). **Compile** To build the Windows executable yourself (without version info!) @@ -77,6 +78,10 @@ # INSTALLATION make +# UPDATE +**DO NOT UPDATE using `-U` !** instead download binaries again or when installed with pip use a described above when installing. +I will add some memorable short links to the binaries so you can download them easier. + # DESCRIPTION **youtube-dlc** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c46d122ffa..3b98e7a124 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -582,6 +582,7 @@ # Supported sites - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **Nintendo** + - **Nitter** - **njoy**: N-JOY - **njoy:embed** - **NJPWWorld**: 新日本プロレスワールド diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index fc351db0dc..dd55ba0f23 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -801,7 +801,7 @@ def add_extra_info(info_dict, extra_info): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}, + def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={}, process=True, force_generic_extractor=False): ''' Returns a list with a dictionary for each video we find. @@ -836,6 +836,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}, '_type': 'compat_list', 'entries': ie_result, } + if info_dict: + if info_dict.get('id'): + ie_result['id'] = info_dict['id'] + if info_dict.get('title'): + ie_result['title'] = info_dict['title'] self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) @@ -898,7 +903,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}): # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info(ie_result['url'], - download, + download, info_dict=ie_result, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': @@ -1852,13 +1857,13 @@ def ensure_dir_exists(path): self.report_error('Cannot write annotations file: ' + annofn) return - def dl(name, info): + def dl(name, info, subtitle=False): fd = get_suitable_downloader(info, self.params)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) + return fd.download(name, info, subtitle) subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -1867,7 +1872,7 @@ def dl(name, info): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) + # ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -1886,6 +1891,8 @@ def dl(name, info): return else: try: + dl(sub_filename, sub_info, subtitle=True) + ''' if self.params.get('sleep_interval_subtitles', False): dl(sub_filename, sub_info) else: @@ -1893,6 +1900,7 @@ def dl(name, info): sub_info['url'], info_dict['id'], note=False).read() with io.open(encodeFilename(sub_filename), 'wb') as subfile: subfile.write(sub_data) + ''' except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 31c2864584..460364a0b0 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -326,7 +326,7 @@ def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') - def download(self, filename, info_dict): + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ @@ -353,16 +353,22 @@ def download(self, filename, info_dict): }) return True - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') self.to_screen( '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) - + int(sleep_interval_sub))) + time.sleep(sleep_interval_sub) return self.real_download(filename, info_dict) def real_download(self, filename, info_dict): diff --git a/youtube_dlc/extractor/adobepass.py b/youtube_dlc/extractor/adobepass.py index 38dca1b0a4..649f9940f3 100644 --- a/youtube_dlc/extractor/adobepass.py +++ b/youtube_dlc/extractor/adobepass.py @@ -1438,6 +1438,13 @@ def extract_redirect_url(html, url=None, fatal=False): provider_redirect_page, 'oauth redirect') self._download_webpage( oauth_redirect_url, video_id, 'Confirming auto login') + elif 'automatically signed in with' in provider_redirect_page: + # Seems like comcast is rolling up new way of automatically signing customers + oauth_redirect_url = self._html_search_regex( + r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page, + 'oauth redirect (signed)') + # Just need to process the request. No useful data comes back + self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login') else: if '
[^/?#&]+)' _TESTS = [{ - 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song', + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'md5': 'c557841d5e50261777a6585648adf439', 'info_dict': { 'id': '1812978515', @@ -85,52 +123,32 @@ class BandcampIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') + url_track_title = title webpage = self._download_webpage(url, title) thumbnail = self._html_search_meta('og:image', webpage, default=None) - track_id = None - track = None - track_number = None - duration = None + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title) - formats = [] - trackinfo_block = self._html_search_regex( - r'trackinfo(?:["\']|"):\[\s*({.+?})\s*\],(?:["\']|")', - webpage, 'track info', default='{}') + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract track') - track_info = self._parse_json(trackinfo_block, title) - if track_info: - file_ = track_info.get('file') - if isinstance(file_, dict): - for format_id, format_url in file_.items(): - if not url_or_none(format_url): - continue - ext, abr_str = format_id.split('-', 1) - formats.append({ - 'format_id': format_id, - 'url': self._proto_relative_url(format_url, 'http:'), - 'ext': ext, - 'vcodec': 'none', - 'acodec': ext, - 'abr': int_or_none(abr_str), - }) + track = self._parse_json_track(json_tracks[0]) + artist = json_tralbum.get('artist') + album_title = json_embed.get('album_title') - track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) - track_number = int_or_none(track_info.get('track_num')) - duration = float_or_none(track_info.get('duration')) + json_album = json_tralbum.get('packages') + if json_album: + json_album = json_album[0] + album_publish_date = json_album.get('album_publish_date') + album_release_date = json_album.get('album_release_date') + else: + album_publish_date = None + album_release_date = json_tralbum.get('album_release_date') - def extract(key): - data = self._html_search_regex( - r',(["\']|")%s\1:\1(?P(?:\\\1|((?!\1).))+)\1' % key, - webpage, key, default=None, group='value') - return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data - - track = extract('title') - artist = extract('artist') - album = extract('album_title') - timestamp = unified_timestamp( - extract('publish_date') or extract('album_publish_date')) - release_date = unified_strdate(extract('album_release_date')) + timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date) + release_date = unified_strdate(album_release_date) download_link = self._search_regex( r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P(?:(?!\1).)+)\1', webpage, @@ -155,8 +173,6 @@ def extract(key): if info: downloads = info.get('downloads') if isinstance(downloads, dict): - if not track: - track = info.get('title') if not artist: artist = info.get('artist') if not thumbnail: @@ -190,7 +206,7 @@ def extract(key): retry_url = url_or_none(stat.get('retry_url')) if not retry_url: continue - formats.append({ + track['formats'].append({ 'url': self._proto_relative_url(retry_url, 'http:'), 'ext': download_formats.get(format_id), 'format_id': format_id, @@ -199,32 +215,28 @@ def extract(key): 'vcodec': 'none', }) - self._sort_formats(formats) + self._sort_formats(track['formats']) - title = '%s - %s' % (artist, track) if artist else track - - if not duration: - duration = float_or_none(self._html_search_meta( - 'duration', webpage, default=None)) + title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title') return { - 'id': track_id, - 'title': title, - 'thumbnail': thumbnail, - 'uploader': artist, - 'timestamp': timestamp, - 'release_date': release_date, - 'duration': duration, - 'track': track, - 'track_number': track_number, - 'track_id': track_id, + 'album': album_title, 'artist': artist, - 'album': album, - 'formats': formats, + 'duration': track['duration'], + 'formats': track['formats'], + 'id': track['id'], + 'release_date': release_date, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'title': title, + 'track': track['title'], + 'track_id': track['id'], + 'track_number': track['number'], + 'uploader': artist } -class BandcampAlbumIE(InfoExtractor): +class BandcampAlbumIE(BandcampBaseIE): IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^/?#&]+))?' @@ -305,34 +317,32 @@ def _real_extract(self, url): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - track_elements = re.findall( - r'(?s)]*>(.*?]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)', webpage) - if not track_elements: - raise ExtractorError('The page doesn\'t contain any tracks') + + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id) + + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract album tracks') + + album_title = json_embed.get('album_title') + # Only tracks with duration info have songs + tracks = [self._parse_json_track(track) for track in json_tracks] entries = [ self.url_result( - compat_urlparse.urljoin(url, t_path), - ie=BandcampIE.ie_key(), - video_title=self._search_regex( - r']+\bitemprop=["\']name["\'][^>]*>([^<]+)', - elem_content, 'track title', fatal=False)) - for elem_content, t_path in track_elements - if self._html_search_meta('duration', elem_content, default=None)] - - title = self._html_search_regex( - r'album_title\s*(?:"|["\']):\s*("|["\'])(?P(?:\\\1|((?!\1).))+)\1', - webpage, 'title', fatal=False, group='album') - - if title: - title = title.replace(r'\"', '"') + compat_urlparse.urljoin(url, track['title_link']), + ie=BandcampIE.ie_key(), video_id=track['id'], + video_title=track['title']) + for track in tracks + if track.get('duration')] return { '_type': 'playlist', 'uploader_id': uploader_id, 'id': playlist_id, - 'title': title, - 'entries': entries, + 'title': album_title, + 'entries': entries } diff --git a/youtube_dlc/extractor/brightcove.py b/youtube_dlc/extractor/brightcove.py index 2aa9f4782e..638673c31e 100644 --- a/youtube_dlc/extractor/brightcove.py +++ b/youtube_dlc/extractor/brightcove.py @@ -471,12 +471,17 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}): title = json_data['name'].strip() formats = [] + sources_num = len(json_data.get('sources')) + key_systems_present = 0 for source in json_data.get('sources', []): container = source.get('container') ext = mimetype2ext(source.get('type')) src = source.get('src') - # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object - if ext == 'ism' or container == 'WVM' or source.get('key_systems'): + # https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html + if source.get('key_systems'): + key_systems_present += 1 + continue + elif ext == 'ism' or container == 'WVM': continue elif ext == 'm3u8' or container == 'M2TS': if not src: @@ -533,6 +538,10 @@ def build_format_id(kind): 'format_id': build_format_id('rtmp'), }) formats.append(f) + + if sources_num == key_systems_present: + raise ExtractorError('This video is DRM protected', expected=True) + if not formats: # for sonyliv.com DRM protected videos s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c87..666134d868 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -751,6 +751,7 @@ from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE +from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .noco import NocoIE @@ -1037,6 +1038,10 @@ SkyNewsIE, SkySportsIE, ) +from .skyitalia import ( + SkyArteItaliaIE, + SkyItaliaIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index 6b3658397f..04cc95b6a4 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -289,7 +289,7 @@ def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): return mgid - def _extract_mgid(self, webpage, url, data_zone=None): + def _extract_mgid(self, webpage, url, title=None, data_zone=None): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -300,7 +300,8 @@ def _extract_mgid(self, webpage, url, data_zone=None): except RegexNotFoundError: mgid = None - title = self._match_id(url) + if not title: + title = url_basename(url) try: window_data = self._parse_json(self._search_regex( @@ -336,7 +337,7 @@ def _extract_mgid(self, webpage, url, data_zone=None): def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage, url) + mgid = self._extract_mgid(webpage, url, title=title) videos_info = self._get_videos_info(mgid, url=url) return videos_info diff --git a/youtube_dlc/extractor/netzkino.py b/youtube_dlc/extractor/netzkino.py index aec3026b12..3d1a06d0b5 100644 --- a/youtube_dlc/extractor/netzkino.py +++ b/youtube_dlc/extractor/netzkino.py @@ -13,17 +13,16 @@ class NetzkinoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P[^/]+)' - _TEST = { - 'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond', + _TESTS = [{ + 'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond', 'md5': '92a3f8b76f8d7220acce5377ea5d4873', 'info_dict': { 'id': 'rakete-zum-mond', 'ext': 'mp4', - 'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)', - 'comments': 'mincount:3', - 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28', + 'title': 'Rakete zum Mond \u2013 Jules Verne', + 'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60', 'upload_date': '20120813', 'thumbnail': r're:https?://.*\.jpg$', 'timestamp': 1344858571, @@ -32,17 +31,30 @@ class NetzkinoIE(InfoExtractor): 'params': { 'skip_download': 'Download only works from Germany', } - } + }, { + 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2', + 'md5': 'c7728b2dadd04ff6727814847a51ef03', + 'info_dict': { + 'id': 'dr-jekyll-mrs-hyde-2', + 'ext': 'mp4', + 'title': 'Dr. Jekyll & Mrs. Hyde 2', + 'description': 'md5:c2e9626ebd02de0a794b95407045d186', + 'upload_date': '20190130', + 'thumbnail': r're:https?://.*\.jpg$', + 'timestamp': 1548849437, + 'age_limit': 18, + }, + 'params': { + 'skip_download': 'Download only works from Germany', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - category_id = mobj.group('category') video_id = mobj.group('id') - api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id - api_info = self._download_json(api_url, video_id) - info = next( - p for p in api_info['posts'] if p['slug'] == video_id) + api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id + info = self._download_json(api_url, video_id) custom_fields = info['custom_fields'] production_js = self._download_webpage( @@ -67,23 +79,12 @@ def _real_extract(self, url): } for key, tpl in templates.items()] self._sort_formats(formats) - comments = [{ - 'timestamp': parse_iso8601(c.get('date'), delimiter=' '), - 'id': c['id'], - 'author': c['name'], - 'html': c['content'], - 'parent': 'root' if c.get('parent', 0) == 0 else c['parent'], - } for c in info.get('comments', [])] - return { 'id': video_id, 'formats': formats, - 'comments': comments, 'title': info['title'], 'age_limit': int_or_none(custom_fields.get('FSK')[0]), 'timestamp': parse_iso8601(info.get('date'), delimiter=' '), 'description': clean_html(info.get('content')), 'thumbnail': info.get('thumbnail'), - 'playlist_title': api_info.get('title'), - 'playlist_id': category_id, } diff --git a/youtube_dlc/extractor/newgrounds.py b/youtube_dlc/extractor/newgrounds.py index 82e7cf5221..b9f01235f2 100644 --- a/youtube_dlc/extractor/newgrounds.py +++ b/youtube_dlc/extractor/newgrounds.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, extract_attributes, int_or_none, parse_duration, @@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor): 'info_dict': { 'id': '549479', 'ext': 'mp3', - 'title': 'B7 - BusMode', + 'title': 'Burn7 - B7 - BusMode', 'uploader': 'Burn7', 'timestamp': 1378878540, 'upload_date': '20130911', 'duration': 143, }, }, { - 'url': 'https://www.newgrounds.com/portal/view/673111', - 'md5': '3394735822aab2478c31b1004fe5e5bc', + 'url': 'https://www.newgrounds.com/portal/view/1', + 'md5': 'fbfb40e2dc765a7e830cb251d370d981', 'info_dict': { - 'id': '673111', + 'id': '1', 'ext': 'mp4', - 'title': 'Dancin', - 'uploader': 'Squirrelman82', - 'timestamp': 1460256780, - 'upload_date': '20160410', + 'title': 'Brian-Beaton - Scrotum 1', + 'uploader': 'Brian-Beaton', + 'timestamp': 955064100, + 'upload_date': '20000406', }, }, { # source format unavailable, additional mp4 formats @@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor): 'info_dict': { 'id': '689400', 'ext': 'mp4', - 'title': 'ZTV News Episode 8', + 'title': 'Bennettthesage - ZTV News Episode 8', 'uploader': 'BennettTheSage', 'timestamp': 1487965140, 'upload_date': '20170224', @@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor): def _real_extract(self, url): media_id = self._match_id(url) - + formats = [] + uploader = None webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') - media_url = self._parse_json(self._search_regex( - r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id) + media_url_string = self._search_regex( + r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False) - formats = [{ - 'url': media_url, - 'format_id': 'source', - 'quality': 1, - }] + if media_url_string: + media_url = self._parse_json(media_url_string, media_id) + formats = [{ + 'url': media_url, + 'format_id': 'source', + 'quality': 1, + }] - max_resolution = int_or_none(self._search_regex( - r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', - default=None)) - if max_resolution: - url_base = media_url.rpartition('.')[0] - for resolution in (360, 720, 1080): - if resolution > max_resolution: - break - formats.append({ - 'url': '%s.%dp.mp4' % (url_base, resolution), - 'format_id': '%dp' % resolution, - 'height': resolution, - }) + max_resolution = int_or_none(self._search_regex( + r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', + default=None)) + if max_resolution: + url_base = media_url.rpartition('.')[0] + for resolution in (360, 720, 1080): + if resolution > max_resolution: + break + formats.append({ + 'url': '%s.%dp.mp4' % (url_base, resolution), + 'format_id': '%dp' % resolution, + 'height': resolution, + }) + else: + video_id = int_or_none(self._search_regex( + r'data-movie-id=\\"([0-9]+)\\"', webpage, '')) + if not video_id: + raise ExtractorError('Could not extract media data') + + url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id + headers = { + 'Accept': 'application/json', + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest' + } + json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False) + if not json_video: + raise ExtractorError('Could not fetch media data') + + uploader = json_video.get('author') + title = json_video.get('title') + media_formats = json_video.get('sources', []) + for media_format in media_formats: + media_sources = media_formats[media_format] + for source in media_sources: + formats.append({ + 'format_id': media_format, + 'quality': int_or_none(media_format[:-1]), + 'url': source.get('src') + }) self._check_formats(formats, media_id) self._sort_formats(formats) - uploader = self._html_search_regex( - (r'(?s)]*>(.+?).*?\s*Author\s*', - r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', - fatal=False) + if not uploader: + uploader = self._html_search_regex( + (r'(?s)]*>(.+?).*?\s*(?:Author|Artist)\s*', + r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', + fatal=False) timestamp = unified_timestamp(self._html_search_regex( (r'
\s*Uploaded\s*
\s*
([^<]+
\s*
[^<]+)', @@ -109,6 +141,9 @@ def _real_extract(self, url): if '
Song' in webpage: formats[0]['vcodec'] = 'none' + if uploader: + title = "%s - %s" % (uploader, title) + return { 'id': media_id, 'title': title, diff --git a/youtube_dlc/extractor/nitter.py b/youtube_dlc/extractor/nitter.py new file mode 100644 index 0000000000..3191543ed5 --- /dev/null +++ b/youtube_dlc/extractor/nitter.py @@ -0,0 +1,167 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + parse_count, + unified_strdate, + unified_timestamp, + remove_end, + determine_ext, +) +import re + + +class NitterIE(InfoExtractor): + # Taken from https://github.com/zedeus/nitter/wiki/Instances + INSTANCES = ('nitter.net', + 'nitter.snopyta.org', + 'nitter.42l.fr', + 'nitter.nixnet.services', + 'nitter.13ad.de', + 'nitter.pussthecat.org', + 'nitter.mastodont.cat', + 'nitter.dark.fail', + 'nitter.tedomum.net', + 'nitter.cattube.org', + 'nitter.fdn.fr', + 'nitter.1d4.us', + 'nitter.kavin.rocks', + 'tweet.lambda.dance', + 'nitter.cc', + 'nitter.weaponizedhumiliation.com', + '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion', + 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion', + 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion') + + _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')' + _VALID_URL = r'https?://%(instance)s/(?P.+)/status/(?P[0-9]+)(#.)?' % {'instance': _INSTANCES_RE} + current_instance = INSTANCES[0] # the test and official instance + _TESTS = [ + { + # GIF (wrapped in mp4) + 'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m', + 'info_dict': { + 'id': '1314279897502629888', + 'ext': 'mp4', + 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet', + 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Firefox 🔥', + 'uploader_id': 'firefox', + 'uploader_url': 'https://' + current_instance + '/firefox', + 'upload_date': '20201008', + 'timestamp': 1602183720, + }, + }, { # normal video + 'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m', + 'info_dict': { + 'id': '1299715685392756737', + 'ext': 'mp4', + 'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...', + 'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Le Doc', + 'uploader_id': 'Le___Doc', + 'uploader_url': 'https://' + current_instance + '/Le___Doc', + 'upload_date': '20200829', + 'timestamp': 1598711341, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + }, + }, { # video embed in a "Streaming Political Ads" box + 'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m', + 'info_dict': { + 'id': '1321147074491092994', + 'ext': 'mp4', + 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds", + 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds", + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Mozilla', + 'uploader_id': 'mozilla', + 'uploader_url': 'https://' + current_instance + '/mozilla', + 'upload_date': '20201027', + 'timestamp': 1603820982 + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + parsed_url = compat_urlparse.urlparse(url) + base_url = parsed_url.scheme + '://' + parsed_url.netloc + + self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on') + webpage = self._download_webpage(url, video_id) + + video_url = base_url + self._html_search_regex(r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url') + ext = determine_ext(video_url) + + if ext == 'unknown_video': + formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4') + else: + formats = [{ + 'url': video_url, + 'ext': ext + }] + + title = ( + self._og_search_description(webpage).replace('\n', ' ') + or self._html_search_regex(r'
]+title="([^"]+)"', webpage, 'uploader name', fatal=False)) + + if uploader_id: + uploader_url = base_url + '/' + uploader_id + + uploader = self._html_search_regex(r']+title="([^"]+)"', webpage, 'uploader name', fatal=False) + + if uploader: + title = uploader + ' - ' + title + + view_count = parse_count(self._html_search_regex(r']+class="icon-play[^>]*>\s([^<]+)
', webpage, 'view count', fatal=False)) + like_count = parse_count(self._html_search_regex(r']+class="icon-heart[^>]*>\s([^<]+)', webpage, 'like count', fatal=False)) + repost_count = parse_count(self._html_search_regex(r']+class="icon-retweet[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) + comment_count = parse_count(self._html_search_regex(r']+class="icon-comment[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) + + thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url') + or self._html_search_regex(r']+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)) + + thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this + + thumbnails = [] + thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig') + for id in thumbnail_ids: + thumbnails.append({ + 'id': id, + 'url': thumbnail + '%3A' + id, + }) + + date = self._html_search_regex(r']+class="tweet-date"[^>]*>]+title="([^"]+)"', webpage, 'upload date', fatal=False) + upload_date = unified_strdate(date) + timestamp = unified_timestamp(date) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + 'view_count': view_count, + 'like_count': like_count, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'formats': formats, + 'thumbnails': thumbnails, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py new file mode 100644 index 0000000000..3c7bd465df --- /dev/null +++ b/youtube_dlc/extractor/skyitalia.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SkyItaliaBaseIE(InfoExtractor): + _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' + _RES = { + 'low': [426, 240], + 'med': [640, 360], + 'high': [854, 480], + 'hd': [1280, 720] + } + + def _extract_video_id(self, url): + webpage = self._download_webpage(url, 'skyitalia') + video_id = self._html_search_regex( + [r'data-videoid=\"(\d+)\"', + r'http://player\.sky\.it/social\?id=(\d+)\&'], + webpage, 'video_id') + if video_id: + return video_id + raise ExtractorError('Video ID not found.') + + def _get_formats(self, video_id, token): + data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) + data_url = data_url.replace('{token}', token) + video_data = self._parse_json( + self._download_webpage(data_url, video_id), + video_id) + + formats = [] + for q, r in self._RES.items(): + key = 'web_%s_url' % q + if key not in video_data: + continue + formats.append({ + 'url': video_data.get(key), + 'format_id': q, + 'width': r[0], + 'height': r[1] + }) + + self._sort_formats(formats) + title = video_data.get('title') + thumb = video_data.get('thumb') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumb, + 'formats': formats + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if video_id == 'None': + video_id = self._extract_video_id(url) + return self._get_formats(video_id, self._TOKEN) + + +class SkyItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'sky.it' + _VALID_URL = r'''(?x)https?:// + (?Psport|tg24|video) + \.sky\.it/(?:.+?) + (?P[0-9]{6})? + (?:$|\?)''' + + _TESTS = [{ + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', + } + }, { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + }] + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' + + +class SkyArteItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P[0-9]{6})?$' + _TEST = { + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', + } + } + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' diff --git a/youtube_dlc/extractor/tvland.py b/youtube_dlc/extractor/tvland.py index 791144128b..225b6b078c 100644 --- a/youtube_dlc/extractor/tvland.py +++ b/youtube_dlc/extractor/tvland.py @@ -3,6 +3,8 @@ from .spike import ParamountNetworkIE +# TODO: Remove - Reason not used anymore - Service moved to youtube + class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' diff --git a/youtube_dlc/extractor/urplay.py b/youtube_dlc/extractor/urplay.py index 6030b7cb5c..4bc2b78fb5 100644 --- a/youtube_dlc/extractor/urplay.py +++ b/youtube_dlc/extractor/urplay.py @@ -3,6 +3,7 @@ from .common import InfoExtractor from ..utils import unified_timestamp +import re class URPlayIE(InfoExtractor): @@ -13,10 +14,10 @@ class URPlayIE(InfoExtractor): 'info_dict': { 'id': '203704', 'ext': 'mp4', - 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'title': 'Om vetenskap, kritiskt tänkande och motstånd', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', - 'timestamp': 1513512768, - 'upload_date': '20171217', + 'timestamp': 1513292400, + 'upload_date': '20171214', }, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', @@ -37,35 +38,41 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - urplayer_data = self._parse_json(self._search_regex( - r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) + urplayer_data = re.sub(""", "\"", self._search_regex( + r'components\/Player\/Player\" data-react-props=\"({.+?})\"', + webpage, 'urplayer data')) + urplayer_data = self._parse_json(urplayer_data, video_id) + for i in range(len(urplayer_data['accessibleEpisodes'])): + if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id): + urplayer_data = urplayer_data['accessibleEpisodes'][i] + break + host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] - formats = [] - for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): - file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) - if file_http: + urplayer_streams = urplayer_data.get("streamingInfo") + for quality in ('sd'), ('hd'): + location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location") + or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location")) + if location: formats.extend(self._extract_wowza_formats( - 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp'])) + 'http://%s/%s/playlist.m3u8' % (host, location), video_id, + skip_protocols=['f4m', 'rtmp', 'rtsp'])) self._sort_formats(formats) - subtitles = {} - for subtitle in urplayer_data.get('subtitles', []): - subtitle_url = subtitle.get('file') - kind = subtitle.get('kind') - if not subtitle_url or (kind and kind != 'captions'): - continue - subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ - 'url': subtitle_url, + subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location") + if subs: + subtitles.setdefault('Svenska', []).append({ + 'url': subs, }) return { 'id': video_id, 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), - 'thumbnail': urplayer_data.get('image'), - 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), - 'series': urplayer_data.get('series_title'), + 'thumbnail': urplayer_data.get('image', {}).get('1280x720'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), + webpage, 'timestamp')), + 'series': urplayer_data.get('seriesTitle'), 'subtitles': subtitles, 'formats': formats, } diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py index 01b253dcb1..081c5e2e73 100644 --- a/youtube_dlc/extractor/xtube.py +++ b/youtube_dlc/extractor/xtube.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, orderedSet, @@ -33,27 +34,11 @@ class XTubeIE(InfoExtractor): 'title': 'strange erotica', 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', - 'duration': 450, + 'duration': 449, 'view_count': int, 'comment_count': int, 'age_limit': 18, } - }, { - # FLV videos with duplicated formats - 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', - 'md5': 'a406963eb349dd43692ec54631efd88b', - 'info_dict': { - 'id': '9299752', - 'display_id': 'A-Super-Run-Part-1-YT', - 'ext': 'flv', - 'title': 'A Super Run - Part 1 (YT)', - 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616', - 'uploader': 'tshirtguy59', - 'duration': 579, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - }, }, { # new URL schema 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', @@ -89,16 +74,24 @@ def _real_extract(self, url): title, thumbnail, duration = [None] * 3 - config = self._parse_json(self._search_regex( - r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', - default='{}'), video_id, transform_source=js_to_json, fatal=False) - if config: - config = config.get('mainRoll') - if isinstance(config, dict): - title = config.get('title') - thumbnail = config.get('poster') - duration = int_or_none(config.get('duration')) - sources = config.get('sources') or config.get('format') + json_config_string = self._search_regex( + r'playerConf=({.+?}),loaderConf', + webpage, 'config', default=None) + if not json_config_string: + raise ExtractorError("Could not extract video player data") + + json_config_string = json_config_string.replace("!0", "true").replace("!1", "false") + + config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False) + if not config: + raise ExtractorError("Could not extract video player data") + + config = config.get('mainRoll') + if isinstance(config, dict): + title = config.get('title') + thumbnail = config.get('poster') + duration = int_or_none(config.get('duration')) + sources = config.get('sources') or config.get('format') if not isinstance(sources, dict): sources = self._parse_json(self._search_regex( diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 4fb49b864c..d605f1e740 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1406,6 +1406,44 @@ def _get_yt_initial_data(self, video_id, webpage): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_music_metadata_from_yt_initial(self, yt_initial): + music_metadata = [] + key_map = { + 'Album': 'album', + 'Artist': 'artist', + 'Song': 'track' + } + contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents']) + if type(contents) is list: + for content in contents: + music_track = {} + if type(content) is not dict: + continue + videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer']) + if type(videoSecondaryInfoRenderer) is not dict: + continue + rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows']) + if type(rows) is not list: + continue + for row in rows: + metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer']) + if type(metadataRowRenderer) is not dict: + continue + key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText']) + value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \ + try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text']) + if type(key) is not str or type(value) is not str: + continue + if key in key_map: + if key_map[key] in music_track: + # we've started on a new track + music_metadata.append(music_track) + music_track = {} + music_track[key_map[key]] = value + if len(music_track.keys()): + music_metadata.append(music_track) + return music_metadata + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -2051,7 +2089,7 @@ def _extract_filesize(media_url): if cipher: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): - ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' + ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))' jsplayer_url_json = self._search_regex( ASSETS_RE, embed_webpage if age_gate else video_webpage, @@ -2328,6 +2366,14 @@ def extract_meta(field): if release_year: release_year = int(release_year) + yt_initial = self._get_yt_initial_data(video_id, video_webpage) + if yt_initial: + music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) + if len(music_metadata): + album = music_metadata[0].get('album') + artist = music_metadata[0].get('artist') + track = music_metadata[0].get('track') + m_episode = re.search( r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', video_webpage) diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 1d7a7fed2b..66b45220c7 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -582,7 +582,7 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser): 'along with --min-sleep-interval.')) workarounds.add_option( '--sleep-subtitles', - dest='sleep_interval_subtitles', action='store_true', default=False, + dest='sleep_interval_subtitles', action='store_true', default=0, help='Enforce sleep interval on subtitles as well') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py index e49e09c17c..b358e902b2 100644 --- a/youtube_dlc/update.py +++ b/youtube_dlc/update.py @@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener): JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) + def sha256sum(): + h = hashlib.sha256() + b = bytearray(128 * 1024) + mv = memoryview(b) + with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f: + for n in iter(lambda: f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + to_screen('Current Build Hash %s' % sha256sum()) + if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'): to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.') return + # compiled file.exe can find itself by + # to_screen(os.path.basename(sys.executable)) + # and path to py or exe + # to_screen(os.path.realpath(sys.executable)) + # Check if there is a new version try: newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() @@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t find the current version. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if newversion == __version__: to_screen('youtube-dlc is up-to-date (' + __version__ + ')') @@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t obtain versions info. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if 'signature' not in versions_info: to_screen('ERROR: the versions file is not signed or corrupted. Aborting.') @@ -109,6 +127,7 @@ def version_tuple(version_str): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() @@ -155,6 +174,7 @@ def version_tuple(version_str): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py index 54a4ea2aac..f5dc1bdafc 100644 --- a/youtube_dlc/utils.py +++ b/youtube_dlc/utils.py @@ -2320,8 +2320,8 @@ def bug_reports_message(): if ytdl_is_updateable(): update_cmd = 'type youtube-dlc -U to update' else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg = '; please report this issue on https://yt-dl.org/bug .' + update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update' + msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .' msg += ' Make sure you are using the latest version; %s.' % update_cmd msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.' return msg