[linuxacadamy] Improve regex

TODO: We need to make a more robust standard regex for fetching js objects from html
This commit is contained in:
pukkandan 2021-03-21 20:59:03 +05:30
parent 9160a0c6a2
commit 037cc66ec8
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698

View file

@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'What Is Data Science', 'title': 'What Is Data Science',
'description': 'md5:c574a3c20607144fb36cb65bdde76c99', 'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
'timestamp': 1607387907, 'timestamp': int, # The timestamp and upload date changes
'upload_date': '20201208', 'upload_date': r're:\d+',
'duration': 304, 'duration': 304,
}, },
'params': { 'params': {
@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
}, },
'playlist_count': 41, 'playlist_count': 41,
'skip': 'Requires Linux Academy account credentials', 'skip': 'Requires Linux Academy account credentials',
}, {
'url': 'https://linuxacademy.com/cp/modules/view/id/39',
'info_dict': {
'id': '39',
'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
'duration': 89280,
},
'playlist_count': 73,
'skip': 'Requires Linux Academy account credentials',
}] }]
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
@ -162,7 +172,7 @@ def _real_extract(self, url):
if course_id: if course_id:
module = self._parse_json( module = self._parse_json(
self._search_regex( self._search_regex(
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'), r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
item_id) item_id)
entries = [] entries = []
chapter_number = None chapter_number = None