mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 16:41:37 +00:00
[generic] Detect bandcamp pages that use custom domains (closes #1662)
They embed the original url in the 'og:url' property.
This commit is contained in:
parent
bc63d9d329
commit
c19f7764a5
1 changed files with 17 additions and 1 deletions
|
@ -41,7 +41,17 @@ class GenericIE(InfoExtractor):
|
|||
u"uploader_id": u"skillsmatter",
|
||||
u"uploader": u"Skills Matter",
|
||||
}
|
||||
}
|
||||
},
|
||||
# bandcamp page with custom domain
|
||||
{
|
||||
u'url': u'http://bronyrock.com/track/the-pony-mash',
|
||||
u'file': u'3235767654.mp3',
|
||||
u'info_dict': {
|
||||
u'title': u'The Pony Mash',
|
||||
u'uploader': u'M_Pallante',
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
|
@ -155,6 +165,12 @@ def _real_extract(self, url):
|
|||
surl = unescapeHTML(mobj.group(1))
|
||||
return self.url_result(surl, 'Youtube')
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
burl = unescapeHTML(mobj.group(1))
|
||||
return self.url_result(burl, 'Bandcamp')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
|
Loading…
Reference in a new issue