[telebruxelles] Fix extraction (Closes #9142)

This commit is contained in:
Sergey M․ 2016-04-11 00:06:05 +06:00
parent 5899e988d5
commit 452908b257
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -1,11 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor): class TeleBruxellesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
'md5': '59439e568c9ee42fb77588b2096b214f', 'md5': '59439e568c9ee42fb77588b2096b214f',
@ -39,18 +41,18 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article_id = self._html_search_regex( article_id = self._html_search_regex(
r"<article id=\"post-(\d+)\"", webpage, 'article ID') r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
title = self._html_search_regex( title = self._html_search_regex(
r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title') r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
description = self._og_search_description(webpage) description = self._og_search_description(webpage, default=None)
rtmp_url = self._html_search_regex( rtmp_url = self._html_search_regex(
r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"", r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
webpage, 'RTMP url') webpage, 'RTMP url')
rtmp_url = rtmp_url.replace("\" + \"", "") rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
return { return {
'id': article_id, 'id': article_id or display_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,