[QQMusic] Add singer info extractor

This commit is contained in:
Yen Chi Hsuan 2015-03-18 14:59:33 +08:00
parent a2043572aa
commit 8afff9f849
2 changed files with 56 additions and 2 deletions

View file

@ -396,7 +396,10 @@
from .prosiebensat1 import ProSiebenSat1IE from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE from .puls4 import Puls4IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
from .qqmusic import QQMusicIE from .qqmusic import (
QQMusicIE,
QQMusicSingerIE
)
from .quickvid import QuickVidIE from .quickvid import QuickVidIE
from .r7 import R7IE from .r7 import R7IE
from .radiode import RadioDeIE from .radiode import RadioDeIE

View file

@ -3,9 +3,11 @@
import random import random
import time import time
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import strip_jsonp from ..utils import strip_jsonp
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor): class QQMusicIE(InfoExtractor):
@ -34,7 +36,7 @@ def _real_extract(self, url):
detail_info_page = self._download_webpage( detail_info_page = self._download_webpage(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
mid, note='Download sont detail info', mid, note='Download song detail info',
errnote='Unable to get song detail info') errnote='Unable to get song detail info')
song_name = self._html_search_regex( song_name = self._html_search_regex(
@ -62,3 +64,52 @@ def _real_extract(self, url):
'upload_date': publish_time, 'upload_date': publish_time,
'creator': singer, 'creator': singer,
} }
class QQMusicSingerIE(InfoExtractor):
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
_TEST = {
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
'info_dict': {
'id': '001BLpXF2DyJe2',
'title': '林俊杰',
'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
},
'playlist_count': 12,
}
def _real_extract(self, url):
mid = self._match_id(url)
singer_page = self._download_webpage(
'http://y.qq.com/y/static/singer/%s/%s/%s.html' % (mid[-2], mid[-1], mid),
'Download singer page')
entries = []
for item in re.findall(r'<span class="data">([^<>]+)</span>', singer_page):
song_mid = item.split('|')[-5]
entries.append(self.url_result(
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', song_mid))
singer_name = self._html_search_regex(
r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
default=None)
singer_id = self._html_search_regex(
r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
default=None)
singer_desc = None
if singer_id:
req = compat_urllib_request.Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
singer_desc_page = self._download_xml(
req, 'Donwload singer description XML')
singer_desc = singer_desc_page.find('./data/info/desc').text
return self.playlist_result(entries, mid, singer_name, singer_desc)