mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[LinkedInLearning] Add subtitles (#1077)
Authored by: Ashish0804 Closes #1072
This commit is contained in:
parent
e99b2d2771
commit
8dc831f715
1 changed files with 24 additions and 1 deletions
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from itertools import zip_longest
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -8,6 +9,8 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
srt_subtitles_timecode,
|
||||||
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
@ -86,6 +89,16 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def json2srt(self, transcript_lines, duration=None):
|
||||||
|
srt_data = ''
|
||||||
|
for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
|
||||||
|
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
|
||||||
|
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
|
||||||
|
srt_data += '%d\n%s --> %s\n%s\n' % (line + 1, srt_subtitles_timecode(start_time),
|
||||||
|
srt_subtitles_timecode(end_time),
|
||||||
|
caption)
|
||||||
|
return srt_data
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
course_slug, video_slug = self._match_valid_url(url).groups()
|
course_slug, video_slug = self._match_valid_url(url).groups()
|
||||||
|
|
||||||
|
@ -101,6 +114,7 @@ def _real_extract(self, url):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'progressive-%dp' % height,
|
'format_id': 'progressive-%dp' % height,
|
||||||
'url': progressive_url,
|
'url': progressive_url,
|
||||||
|
'ext': 'mp4',
|
||||||
'height': height,
|
'height': height,
|
||||||
'width': width,
|
'width': width,
|
||||||
'source_preference': 1,
|
'source_preference': 1,
|
||||||
|
@ -128,6 +142,14 @@ def _real_extract(self, url):
|
||||||
# However, unless someone can confirm this, the old
|
# However, unless someone can confirm this, the old
|
||||||
# behaviour is being kept as-is
|
# behaviour is being kept as-is
|
||||||
self._sort_formats(formats, ('res', 'source_preference'))
|
self._sort_formats(formats, ('res', 'source_preference'))
|
||||||
|
subtitles = {}
|
||||||
|
duration = int_or_none(video_data.get('durationInSeconds'))
|
||||||
|
transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
|
||||||
|
if transcript_lines:
|
||||||
|
subtitles['en'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self.json2srt(transcript_lines, duration)
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': self._get_video_id(video_data, course_slug, video_slug),
|
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||||
|
@ -135,7 +157,8 @@ def _real_extract(self, url):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': video_data.get('defaultThumbnail'),
|
'thumbnail': video_data.get('defaultThumbnail'),
|
||||||
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||||
'duration': int_or_none(video_data.get('durationInSeconds')),
|
'duration': duration,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue