mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[SponsorBlock] Improve merge algorithm (#999)
Authored by: nihil-admirari
This commit is contained in:
parent
7738bd3272
commit
c6af2dd8e5
2 changed files with 72 additions and 37 deletions
|
@ -461,11 +461,23 @@ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(
|
||||||
self._remove_marked_arrange_sponsors_test_impl(
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts)
|
chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts)
|
||||||
|
|
||||||
|
def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self):
|
||||||
|
cuts = [self._chapter(0.5, 2, remove=True)]
|
||||||
|
chapters = self._chapters([2], ['c']) + cuts
|
||||||
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
|
chapters, self._chapters([0.5], ['c']), cuts)
|
||||||
|
|
||||||
|
def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self):
|
||||||
|
cuts = [self._chapter(0.5, 2, remove=True)]
|
||||||
|
chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts
|
||||||
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
|
chapters, self._chapters([2.5], ['c2']), cuts)
|
||||||
|
|
||||||
def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self):
|
def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self):
|
||||||
chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [
|
chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [
|
||||||
self._sponsor_chapter(1.5, 2.5, 'sponsor')]
|
self._sponsor_chapter(1.5, 2.5, 'sponsor')]
|
||||||
self._remove_marked_arrange_sponsors_test_impl(
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
chapters, self._chapters([1.5, 3, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), [])
|
chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), [])
|
||||||
|
|
||||||
def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self):
|
def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self):
|
||||||
chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [
|
chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [
|
||||||
|
@ -476,6 +488,26 @@ def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self):
|
||||||
chapters, self._chapters([1, 3, 4, 5], [
|
chapters, self._chapters([1, 3, 4, 5], [
|
||||||
'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), [])
|
'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), [])
|
||||||
|
|
||||||
|
def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self):
|
||||||
|
chapters = self._chapters([4], ['c']) + [
|
||||||
|
self._sponsor_chapter(1.5, 2, 'sponsor'),
|
||||||
|
self._sponsor_chapter(2, 4, 'selfpromo')
|
||||||
|
]
|
||||||
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
|
chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), [])
|
||||||
|
|
||||||
|
def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self):
|
||||||
|
self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s'
|
||||||
|
chapters = self._chapters([10], ['c']) + [
|
||||||
|
self._sponsor_chapter(2, 8, 'sponsor'),
|
||||||
|
self._sponsor_chapter(4, 6, 'selfpromo')
|
||||||
|
]
|
||||||
|
self._remove_marked_arrange_sponsors_test_impl(
|
||||||
|
chapters, self._chapters([2, 4, 6, 8, 10], [
|
||||||
|
'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion',
|
||||||
|
'[SponsorBlock]: Sponsor', 'c'
|
||||||
|
]), [])
|
||||||
|
|
||||||
def test_make_concat_opts_CommonCase(self):
|
def test_make_concat_opts_CommonCase(self):
|
||||||
sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
|
sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
|
||||||
expected = '''ffconcat version 1.0
|
expected = '''ffconcat version 1.0
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
_TINY_SPONSOR_OVERLAP_DURATION = 1
|
_TINY_CHAPTER_DURATION = 1
|
||||||
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
|
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,7 +50,6 @@ def run(self, info):
|
||||||
if not info.get('__real_download'):
|
if not info.get('__real_download'):
|
||||||
raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
|
raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
|
||||||
'Different chapters may have already been removed')
|
'Different chapters may have already been removed')
|
||||||
return [], info
|
|
||||||
else:
|
else:
|
||||||
self.write_debug('Expected and actual durations mismatch')
|
self.write_debug('Expected and actual durations mismatch')
|
||||||
|
|
||||||
|
@ -145,37 +144,14 @@ def excess_duration(c):
|
||||||
|
|
||||||
new_chapters = []
|
new_chapters = []
|
||||||
|
|
||||||
def chapter_length(c):
|
|
||||||
return c['end_time'] - c['start_time']
|
|
||||||
|
|
||||||
def original_uncut_chapter(c):
|
|
||||||
return '_was_cut' not in c and '_categories' not in c
|
|
||||||
|
|
||||||
def append_chapter(c):
|
def append_chapter(c):
|
||||||
assert 'remove' not in c
|
assert 'remove' not in c
|
||||||
length = chapter_length(c) - excess_duration(c)
|
length = c['end_time'] - c['start_time'] - excess_duration(c)
|
||||||
# Chapter is completely covered by cuts or sponsors.
|
# Chapter is completely covered by cuts or sponsors.
|
||||||
if length <= 0:
|
if length <= 0:
|
||||||
return
|
return
|
||||||
start = new_chapters[-1]['end_time'] if new_chapters else 0
|
start = new_chapters[-1]['end_time'] if new_chapters else 0
|
||||||
c.update(start_time=start, end_time=start + length)
|
c.update(start_time=start, end_time=start + length)
|
||||||
# Append without checking for tininess to prevent having
|
|
||||||
# a completely empty chapter list.
|
|
||||||
if not new_chapters:
|
|
||||||
new_chapters.append(c)
|
|
||||||
return
|
|
||||||
old_c = new_chapters[-1]
|
|
||||||
# Merge with the previous if the chapter is tiny.
|
|
||||||
# Only tiny chapters resulting from a cut can be skipped.
|
|
||||||
# Chapters that were already tiny in the original list will be preserved.
|
|
||||||
if not original_uncut_chapter(c) and length < _TINY_SPONSOR_OVERLAP_DURATION:
|
|
||||||
old_c['end_time'] = c['end_time']
|
|
||||||
# Previous tiny chapter was appended for the sake of preventing an empty chapter list.
|
|
||||||
# Replace it with the current one.
|
|
||||||
elif not original_uncut_chapter(old_c) and chapter_length(old_c) < _TINY_SPONSOR_OVERLAP_DURATION:
|
|
||||||
c['start_time'] = old_c['start_time']
|
|
||||||
new_chapters[-1] = c
|
|
||||||
else:
|
|
||||||
new_chapters.append(c)
|
new_chapters.append(c)
|
||||||
|
|
||||||
# Turn into a priority queue, index is a tie breaker.
|
# Turn into a priority queue, index is a tie breaker.
|
||||||
|
@ -275,10 +251,36 @@ def append_chapter(c):
|
||||||
append_chapter(cur_chapter)
|
append_chapter(cur_chapter)
|
||||||
cur_i, cur_chapter = i, c
|
cur_i, cur_chapter = i, c
|
||||||
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
|
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
|
||||||
|
return self._remove_tiny_rename_sponsors(new_chapters), cuts
|
||||||
|
|
||||||
|
def _remove_tiny_rename_sponsors(self, chapters):
|
||||||
|
new_chapters = []
|
||||||
|
for i, c in enumerate(chapters):
|
||||||
|
# Merge with the previous/next if the chapter is tiny.
|
||||||
|
# Only tiny chapters resulting from a cut can be skipped.
|
||||||
|
# Chapters that were already tiny in the original list will be preserved.
|
||||||
|
if (('_was_cut' in c or '_categories' in c)
|
||||||
|
and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
|
||||||
|
if not new_chapters:
|
||||||
|
# Prepend tiny chapter to the next one if possible.
|
||||||
|
if i < len(chapters) - 1:
|
||||||
|
chapters[i + 1]['start_time'] = c['start_time']
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
old_c = new_chapters[-1]
|
||||||
|
if i < len(chapters) - 1:
|
||||||
|
next_c = chapters[i + 1]
|
||||||
|
# Not a typo: key names in old_c and next_c are really different.
|
||||||
|
prev_is_sponsor = 'categories' in old_c
|
||||||
|
next_is_sponsor = '_categories' in next_c
|
||||||
|
# Preferentially prepend tiny normals to normals and sponsors to sponsors.
|
||||||
|
if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
|
||||||
|
or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
|
||||||
|
next_c['start_time'] = c['start_time']
|
||||||
|
continue
|
||||||
|
old_c['end_time'] = c['end_time']
|
||||||
|
continue
|
||||||
|
|
||||||
i = -1
|
|
||||||
for c in new_chapters.copy():
|
|
||||||
i += 1
|
|
||||||
c.pop('_was_cut', None)
|
c.pop('_was_cut', None)
|
||||||
cats = c.pop('_categories', None)
|
cats = c.pop('_categories', None)
|
||||||
if cats:
|
if cats:
|
||||||
|
@ -292,12 +294,13 @@ def append_chapter(c):
|
||||||
})
|
})
|
||||||
outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c)
|
outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c)
|
||||||
c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
|
c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
|
||||||
if i > 0 and c['title'] == new_chapters[i - 1]['title']:
|
# Merge identically named sponsors.
|
||||||
new_chapters[i - 1]['end_time'] = c['end_time']
|
if (new_chapters and 'categories' in new_chapters[-1]
|
||||||
new_chapters.pop(i)
|
and new_chapters[-1]['title'] == c['title']):
|
||||||
i -= 1
|
new_chapters[-1]['end_time'] = c['end_time']
|
||||||
|
continue
|
||||||
return new_chapters, cuts
|
new_chapters.append(c)
|
||||||
|
return new_chapters
|
||||||
|
|
||||||
def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
|
def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
|
||||||
in_file = filename
|
in_file = filename
|
||||||
|
|
Loading…
Reference in a new issue