From 09c127ff838505de1bddde56ad4d22f46ebf6ed7 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 5 Oct 2022 20:54:41 -0700 Subject: [PATCH] [extractor/Tnaflix] Fix for HTTP 500 (#5150) Closes #5107 Authored by: SG5 --- yt_dlp/extractor/tnaflix.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index 34361e515..8cbfeb7fb 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -19,6 +19,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): r'config\s*=\s*(["\'])(?P(?:https?:)?//(?:(?!\1).)+)\1', ] _HOST = 'tna' + _VIDEO_XML_URL = 'https://www.tnaflix.com/cdn/cdn.php?file={}.fid&key={}&VID={}&nomp4=1&catID=0&rollover=1&startThumb=12&embed=0&utm_source=0&multiview=0&premium=1&country=0user=0&vip=1&cd=0&ref=0&alpha' _VKEY_SUFFIX = '' _TITLE_REGEX = r']+name="title" value="([^"]+)"' _DESCRIPTION_REGEX = r']+name="description" value="([^"]+)"' @@ -71,6 +72,10 @@ def get_child(elem, names): def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') + + def extract_field(pattern, name): + return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None + for display_id_key in ('display_id', 'display_id_2'): if display_id_key in mobj.groupdict(): display_id = mobj.group(display_id_key) @@ -85,6 +90,13 @@ def _real_extract(self, url): self._CONFIG_REGEX, webpage, 'flashvars.config', default=None, group='url'), 'http:') + if not cfg_url: + vkey = extract_field(r']+\bid="vkey"\b[^>]+\bvalue="([^"]+)"', 'vkey') + nkey = extract_field(r']+\bid="nkey"\b[^>]+\bvalue="([^"]+)"', 'nkey') + vid = extract_field(r']+\bid="VID"\b[^>]+\bvalue="([^"]+)"', 'vid') + if vkey and nkey and vid: + cfg_url = self._proto_relative_url(self._VIDEO_XML_URL.format(vkey, nkey, vid), 'http:') + if not cfg_url: inputs = self._hidden_inputs(webpage) cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' @@ -139,9 +151,6 @@ def extract_video_url(vl): duration = parse_duration(self._html_search_meta( 'duration', webpage, 'duration', default=None)) - def extract_field(pattern, name): - return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None - description = extract_field(self._DESCRIPTION_REGEX, 'description') uploader = extract_field(self._UPLOADER_REGEX, 'uploader') view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))