mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
Fix some IEs that didn't return the uploade_date in the YYYYMMDD format
Create a function unified_strdate in utils.py to fix these problems
This commit is contained in:
parent
bd55852517
commit
bf50b0383e
3 changed files with 29 additions and 11 deletions
|
@ -15,6 +15,7 @@
|
|||
from youtube_dl.utils import unescapeHTML
|
||||
from youtube_dl.utils import orderedSet
|
||||
from youtube_dl.utils import DateRange
|
||||
from youtube_dl.utils import unified_strdate
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
_compat_str = lambda b: b.decode('unicode-escape')
|
||||
|
@ -104,6 +105,12 @@ def test_daterange(self):
|
|||
self.assertTrue("19690721" in _ac)
|
||||
_firstmilenium = DateRange(end="10000101")
|
||||
self.assertTrue("07110427" in _firstmilenium)
|
||||
|
||||
def test_unified_dates(self):
|
||||
self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
|
||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -562,12 +562,7 @@ def _real_extract(self, url):
|
|||
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
||||
if mobj is not None:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
|
||||
except:
|
||||
pass
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
# description
|
||||
video_description = get_element_by_id("eow-description", video_webpage)
|
||||
|
@ -2385,7 +2380,7 @@ def _real_extract(self, url):
|
|||
shortMediaId = mediaId.split(':')[-1]
|
||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||
officialTitle = itemEl.findall('./title')[0].text
|
||||
officialDate = itemEl.findall('./pubDate')[0].text
|
||||
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
|
||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||
|
@ -2695,12 +2690,13 @@ def _real_extract(self, url):
|
|||
|
||||
streams = json.loads(stream_json)
|
||||
mediaURL = streams['http_mp3_128_url']
|
||||
upload_date = unified_strdate(info['created_at'])
|
||||
|
||||
return [{
|
||||
'id': info['id'],
|
||||
'url': mediaURL,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': info['created_at'],
|
||||
'upload_date': upload_date,
|
||||
'title': info['title'],
|
||||
'ext': u'mp3',
|
||||
'description': info['description'],
|
||||
|
@ -3759,7 +3755,7 @@ def _real_extract(self, url):
|
|||
self._downloader.report_warning(u'unable to extract video date')
|
||||
upload_date = None
|
||||
else:
|
||||
upload_date = result.group('date').strip()
|
||||
upload_date = unified_strdate(result.group('date').strip())
|
||||
|
||||
# Get the video uploader
|
||||
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
|
||||
|
@ -3866,7 +3862,7 @@ def _real_extract(self, url):
|
|||
if result is None:
|
||||
self._downloader.report_error(u'unable to extract video title')
|
||||
return
|
||||
upload_date = result.group('date')
|
||||
upload_date = unified_strdate(result.group('date'))
|
||||
|
||||
info = {'id': video_id,
|
||||
'url': video_url,
|
||||
|
|
|
@ -569,7 +569,22 @@ def http_response(self, req, resp):
|
|||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
|
||||
def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
upload_date = None
|
||||
#Replace commas
|
||||
date_str = date_str.replace(',',' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
except:
|
||||
pass
|
||||
return upload_date
|
||||
|
||||
def date_from_str(date_str):
|
||||
"""Return a datetime object from a string in the format YYYYMMDD"""
|
||||
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
|
||||
|
|
Loading…
Reference in a new issue