diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 62b1da25e..2ba86878b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -15,6 +15,7 @@ compat_xml_parse_error, ExtractorError, + float_or_none, HEADRequest, orderedSet, parse_xml, @@ -305,6 +306,30 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, } + }, + # Camtasia studio + { + 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', + 'playlist': [{ + 'md5': '0c5e352edabf715d762b0ad4e6d9ee67', + 'info_dict': { + 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', + 'ext': 'flv', + 'duration': 2235.90, + } + }, { + 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', + 'info_dict': { + 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP', + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', + 'ext': 'flv', + 'duration': 2235.93, + } + }], + 'info_dict': { + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', + } } ] @@ -389,6 +414,43 @@ def _extract_rss(self, url, video_id, doc): 'entries': entries, } + def _extract_camtasia(self, url, video_id, webpage): + """ Returns None if no camtasia video can be found. """ + + camtasia_cfg = self._search_regex( + r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);', + webpage, 'camtasia configuration file', default=None) + if camtasia_cfg is None: + return None + + title = self._html_search_meta('DC.title', webpage, fatal=True) + + camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg) + camtasia_cfg = self._download_xml( + camtasia_url, video_id, + note='Downloading camtasia configuration', + errnote='Failed to download camtasia configuration') + fileset_node = camtasia_cfg.find('./playlist/array/fileset') + + entries = [] + for n in fileset_node.getchildren(): + url_n = n.find('./uri') + if url_n is None: + continue + + entries.append({ + 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], + 'title': '%s - %s' % (title, n.tag), + 'url': compat_urlparse.urljoin(url, url_n.text), + 'duration': float_or_none(n.find('./duration').text), + }) + + return { + '_type': 'playlist', + 'entries': entries, + 'title': title, + } + def _real_extract(self, url): if url.startswith('//'): return { @@ -477,6 +539,11 @@ def _real_extract(self, url): except compat_xml_parse_error: pass + # Is it a Camtasia project? + camtasia_res = self._extract_camtasia(url, video_id, webpage) + if camtasia_res is not None: + return camtasia_res + # Sometimes embedded video player is hidden behind percent encoding # (e.g. https://github.com/rg3/youtube-dl/issues/2448) # Unescaping the whole page allows to handle those cases in a generic way