[generic] Refactor _extract_rss

Closes #3738
2024-11-21 20:46:36 -05:00 · 2022-05-18 04:14:13 +05:30 · 2022-05-18 04:14:13 +05:30 · d6bf1161db
commit d6bf1161db
parent 7896214c42
1 changed files with 14 additions and 35 deletions
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -129,6 +129,7 @@
    sanitized_Request,
    smuggle_url,
    str_or_none,
    try_call,
    unescapeHTML,
    unified_timestamp,
    unsmuggle_url,
@ -2536,66 +2537,44 @@ def report_detected(self, name):
        self._downloader.write_debug(f'Identified a {name}')
    def _extract_rss(self, url, video_id, doc):
        playlist_title = doc.find('./channel/title').text
        playlist_desc_el = doc.find('./channel/description')
        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
        NS_MAP = {
            'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
        }
        entries = []
        for it in doc.findall('./channel/item'):
-            next_url = None
+            next_url = next(
-            enclosure_nodes = it.findall('./enclosure')
+                (e.attrib.get('url') for e in it.findall('./enclosure')),
-            for e in enclosure_nodes:
+                xpath_text(it, 'link', fatal=False))
                next_url = e.attrib.get('url')
                if next_url:
                    break
            if not next_url:
                next_url = xpath_text(it, 'link', fatal=False)
            if not next_url:
                continue
-            if it.find('guid').text is not None:
+            guid = try_call(lambda: it.find('guid').text)
-                next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
+            if guid:
                next_url = smuggle_url(next_url, {'force_videoid': guid})
            def itunes(key):
-                return xpath_text(
+                return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
                    it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
                    default=None)
            duration = itunes('duration')
            explicit = (itunes('explicit') or '').lower()
            if explicit in ('true', 'yes'):
                age_limit = 18
            elif explicit in ('false', 'no'):
                age_limit = 0
            else:
                age_limit = None
            entries.append({
                '_type': 'url_transparent',
                'url': next_url,
-                'title': it.find('title').text,
+                'title': try_call(lambda: it.find('title').text),
                'description': xpath_text(it, 'description', default=None),
-                'timestamp': unified_timestamp(
+                'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
-                    xpath_text(it, 'pubDate', default=None)),
+                'duration': parse_duration(itunes('duration')),
                'duration': int_or_none(duration) or parse_duration(duration),
                'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
                'episode': itunes('title'),
                'episode_number': int_or_none(itunes('episode')),
                'season_number': int_or_none(itunes('season')),
-                'age_limit': age_limit,
+                'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
            })
        return {
            '_type': 'playlist',
            'id': url,
-            'title': playlist_title,
+            'title': try_call(lambda: doc.find('./channel/title').text),
-            'description': playlist_desc,
+            'description': try_call(lambda: doc.find('./channel/description').text),
            'entries': entries,
        }