[ruutu] Detect embeds (#3294)

Authored by: tpikonen
This commit is contained in:
Teemu Ikonen 2022-04-05 15:15:47 +03:00 committed by GitHub
parent f4d706a931
commit 0a8a7e68fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 1 deletions

View file

@ -149,6 +149,7 @@
from .mainstreaming import MainStreamingIE
from .gfycat import GfycatIE
from .panopto import PanoptoBaseIE
from .ruutu import RuutuIE
class GenericIE(InfoExtractor):
@ -2511,7 +2512,24 @@ class GenericIE(InfoExtractor):
'id': 'insert-a-quiz-into-a-panopto-video'
},
'playlist_count': 1
}
},
{
# Ruutu embed
'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen',
'md5': 'a2513a98d3496099e6eced40f7e6a14b',
'info_dict': {
'id': '4044426',
'ext': 'mp4',
'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa metsästysreissu huipentuu kasvissyöjän painajaiseen!',
'thumbnail': r're:^https?://.+\.jpg$',
'duration': 108,
'series' : 'Madventures Suomi',
'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381',
'categories': ['Matkailu', 'Elämäntyyli'],
'age_limit': 0,
'upload_date': '20220308',
},
},
]
def report_following_redirect(self, new_url):
@ -3737,6 +3755,12 @@ def _real_extract(self, url):
panopto_urls = PanoptoBaseIE._extract_urls(webpage)
if panopto_urls:
return self.playlist_from_matches(panopto_urls, video_id, video_title)
# Look for Ruutu embeds
ruutu_url = RuutuIE._extract_url(webpage)
if ruutu_url:
return self.url_result(ruutu_url, RuutuIE)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:

View file

@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
@ -8,6 +11,8 @@
ExtractorError,
find_xpath_attr,
int_or_none,
traverse_obj,
try_call,
unified_strdate,
url_or_none,
xpath_attr,
@ -123,6 +128,16 @@ class RuutuIE(InfoExtractor):
]
_API_BASE = 'https://gatling.nelonenmedia.fi'
@classmethod
def _extract_url(cls, webpage):
settings = try_call(
lambda: json.loads(re.search(
r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False))
video_id = traverse_obj(settings, (
'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
if video_id:
return f'http://www.ruutu.fi/video/{video_id}'
def _real_extract(self, url):
video_id = self._match_id(url)