From 3ba8de62d61d782256f5c1e9939a0762039657de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Finn=20R=2E=20G=C3=A4rtner?= <65015656+FinnRG@users.noreply.github.com> Date: Sun, 26 May 2024 01:40:35 +0200 Subject: [PATCH] [ie/Piapro] Fix extractor (#9311) Closes #9884 Authored by: FinnRG, seproDev --- yt_dlp/extractor/piapro.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 3ae985da2..87d912d56 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -2,6 +2,8 @@ from ..compat import compat_urlparse from ..utils import ( ExtractorError, + clean_html, + get_element_by_class, parse_duration, parse_filesize, str_to_int, @@ -88,34 +90,22 @@ def _real_extract(self, url): if category_id not in ('1', '2', '21', '22', '23', '24', '25'): raise ExtractorError('The URL does not contain audio.', expected=True) - str_duration, str_filesize = self._search_regex( - r'サイズ:(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', - group=(1, 2), default=(None, None)) - str_viewcount = self._search_regex(r'閲覧数:([0-9,]+)\s+', webpage, 'view count', fatal=False) - - uploader_id, uploader = self._search_regex( - r'([^<]+)さん<', webpage, 'uploader', - group=(1, 2), default=(None, None)) - content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') - create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') - - player_webpage = self._download_webpage( - f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', - video_id, note='Downloading player webpage') + def extract_info(name, description): + return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*

', webpage, description, default=None) return { 'id': video_id, - 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(?s)(.+?)

\s*