From 6089ff40e7cc7710e399db1be87fea103a190ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Jan 2019 00:37:01 +0700 Subject: [PATCH] [youporn] Fix title and description extraction (closes #18748) --- youtube_dl/extractor/youporn.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index ea0bce784..d4eccb4b2 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,11 +68,9 @@ def _real_extract(self, url): request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) - title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], - webpage, 'title', group='title', - default=None) or self._og_search_title( + title = self._html_search_regex( + r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', + webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or self._html_search_meta( 'title', webpage, fatal=True) @@ -134,7 +132,11 @@ def _real_extract(self, url): formats.append(f) self._sort_formats(formats) - description = self._og_search_description(webpage, default=None) + description = self._html_search_regex( + r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>', + webpage, 'description', + default=None) or self._og_search_description( + webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail')