From 97067db2ae32b18ef573af99b91d36026b4b1d05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 26 May 2017 22:12:24 +0700 Subject: [PATCH] [bbc] Add support for authentication --- youtube_dl/extractor/bbc.py | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index dd65b8d86..7cd4446b4 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -6,14 +6,18 @@ from .common import InfoExtractor from ..utils import ( + clean_html, dict_get, ExtractorError, float_or_none, + get_element_by_class, int_or_none, parse_duration, parse_iso8601, try_get, unescapeHTML, + urlencode_postdata, + urljoin, ) from ..compat import ( compat_etree_fromstring, @@ -38,6 +42,9 @@ class BBCCoUkIE(InfoExtractor): (?P%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX + _LOGIN_URL = 'https://account.bbc.com/signin' + _NETRC_MACHINE = 'bbc' + _MEDIASELECTOR_URLS = [ # Provides HQ HLS streams with even better quality that pc mediaset but fails # with geolocation in some cases when it's even not geo restricted at all (e.g. @@ -227,6 +234,39 @@ class BBCCoUkIE(InfoExtractor): _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading signin page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + post_url = urljoin(self._LOGIN_URL, self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url')) + + response, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + if self._LOGIN_URL in urlh.geturl(): + error = clean_html(get_element_by_class('form-message', response)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + class MediaSelectionError(Exception): def __init__(self, id): self.id = id