[huajiao] Add new extractor

This commit is contained in:
Philip Xu 2016-10-13 21:51:26 -04:00
parent 580d411931
commit b7f59a3bf6
2 changed files with 51 additions and 0 deletions

View file

@ -372,6 +372,7 @@
HRTiIE, HRTiIE,
HRTiPlaylistIE, HRTiPlaylistIE,
) )
from .huajiao import HuajiaoIE
from .huffpost import HuffPostIE from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE from .iconosquare import IconosquareIE

View file

@ -0,0 +1,50 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import parse_duration, parse_iso8601
from .common import InfoExtractor
class HuajiaoIE(InfoExtractor):
IE_DESC = '花椒直播'
_VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.huajiao.com/l/38941232',
'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
'info_dict': {
'id': '38941232',
'ext': 'mp4',
'title': '#新人求关注#',
'description': 're:.*',
'duration': 2424.0,
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1475866459,
'upload_date': '20161007',
'uploader': 'Penny_余姿昀',
'uploader_id': '75206005',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
feed_json = self._search_regex(
r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str')
feed = self._parse_json(feed_json, video_id)
description = self._html_search_meta(
'description', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': feed['feed']['formated_title'],
'description': description,
'duration': parse_duration(feed['feed']['duration']),
'thumbnail': feed['feed']['image'],
'timestamp': parse_iso8601(feed['creatime'], ' '),
'uploader': feed['author']['nickname'],
'uploader_id': feed['author']['uid'],
'formats': self._extract_m3u8_formats(
feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
}