From 4b4a138eee71cfe6385085bc6795ea731f7e8c85 Mon Sep 17 00:00:00 2001 From: stevenyomi <95685115+stevenyomi@users.noreply.github.com> Date: Sat, 24 Sep 2022 22:29:55 +0800 Subject: [PATCH] Improve chapter recognition (#8050) --- .../util/chapter/ChapterRecognition.kt | 65 +++++++------------ .../util/chapter/ChapterRecognitionTest.kt | 13 +++- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognition.kt b/app/src/main/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognition.kt index 72cb6b1abe..c1d16eb47e 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognition.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognition.kt @@ -4,35 +4,31 @@ package eu.kanade.tachiyomi.util.chapter * -R> = regex conversion. */ object ChapterRecognition { + + private const val NUMBER_PATTERN = """([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""" + /** * All cases with Ch.xx * Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4 */ - private val basic = Regex("""(?<=ch\.) *([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") + private val basic = Regex("""(?<=ch\.) *$NUMBER_PATTERN""") /** - * Regex used when only one number occurrence * Example: Bleach 567: Down With Snowwhite -R> 567 */ - private val occurrence = Regex("""([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") - - /** - * Regex used when manga title removed - * Example: Solanin 028 Vol. 2 -> 028 Vol.2 -> 028Vol.2 -R> 028 - */ - private val withoutManga = Regex("""^([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") + private val number = Regex(NUMBER_PATTERN) /** * Regex used to remove unwanted tags * Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12 */ - private val unwanted = Regex("""(? One Piece 12special */ - private val unwantedWhiteSpace = Regex("""(\s)(extra|special|omake)""") + private val unwantedWhiteSpace = Regex("""\s(?=extra|special|omake)""") fun parseChapterNumber(mangaTitle: String, chapterName: String, chapterNumber: Float? = null): Float { // If chapter number is known return. @@ -43,40 +39,23 @@ object ChapterRecognition { // Get chapter title with lower case var name = chapterName.lowercase() + // Remove manga title from chapter title. + name = name.replace(mangaTitle.lowercase(), "").trim() + // Remove comma's or hyphens. name = name.replace(',', '.').replace('-', '.') // Remove unwanted white spaces. - unwantedWhiteSpace.findAll(name).let { - it.forEach { occurrence -> name = name.replace(occurrence.value, occurrence.value.trim()) } - } + name = unwantedWhiteSpace.replace(name, "") // Remove unwanted tags. - unwanted.findAll(name).let { - it.forEach { occurrence -> name = name.replace(occurrence.value, "") } - } + name = unwanted.replace(name, "") // Check base case ch.xx - getChapterNumberFromMatch(basic.find(name))?.let { return it } - - // Check one number occurrence. - val occurrences: MutableList = arrayListOf() - occurrence.findAll(name).let { - it.forEach { occurrence -> occurrences.add(occurrence) } - } - - if (occurrences.size == 1) { - getChapterNumberFromMatch(occurrences[0])?.let { return it } - } - - // Remove manga title from chapter title. - val nameWithoutManga = name.replace(mangaTitle.lowercase(), "").trim() - - // Check if first value is number after title remove. - getChapterNumberFromMatch(withoutManga.find(nameWithoutManga))?.let { return it } + basic.find(name)?.let { return getChapterNumberFromMatch(it) } // Take the first number encountered. - getChapterNumberFromMatch(occurrence.find(nameWithoutManga))?.let { return it } + number.find(name)?.let { return getChapterNumberFromMatch(it) } return chapterNumber ?: -1f } @@ -86,8 +65,8 @@ object ChapterRecognition { * @param match result of regex * @return chapter number if found else null */ - private fun getChapterNumberFromMatch(match: MatchResult?): Float? { - return match?.let { + private fun getChapterNumberFromMatch(match: MatchResult): Float { + return match.let { val initial = it.groups[1]?.value?.toFloat()!! val subChapterDecimal = it.groups[2]?.value val subChapterAlpha = it.groups[3]?.value @@ -120,11 +99,9 @@ object ChapterRecognition { return .97f } - return if (alpha[0] == '.') { - // Take value after (.) - parseAlphaPostFix(alpha[1]) - } else { - parseAlphaPostFix(alpha[0]) + val trimmedAlpha = alpha.trimStart('.') + if (trimmedAlpha.length == 1) { + return parseAlphaPostFix(trimmedAlpha[0]) } } @@ -135,6 +112,8 @@ object ChapterRecognition { * x.a -> x.1, x.b -> x.2, etc */ private fun parseAlphaPostFix(alpha: Char): Float { - return ("0." + (alpha.code - 96).toString()).toFloat() + val number = alpha.code - ('a'.code - 1) + if (number >= 10) return 0f + return number / 10f } } diff --git a/app/src/test/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognitionTest.kt b/app/src/test/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognitionTest.kt index a83d7ada00..022a5d461e 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognitionTest.kt +++ b/app/src/test/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognitionTest.kt @@ -198,7 +198,6 @@ class ChapterRecognitionTest { assertChapter(mangaTitle, "Fairy Tail 404.extravol002", 404.99f) assertChapter(mangaTitle, "Fairy Tail 404 extravol002", 404.99f) - assertChapter(mangaTitle, "Fairy Tail 404.evol002", 404.5f) } @Test @@ -207,7 +206,6 @@ class ChapterRecognitionTest { assertChapter(mangaTitle, "Fairy Tail 404.omakevol002", 404.98f) assertChapter(mangaTitle, "Fairy Tail 404 omakevol002", 404.98f) - assertChapter(mangaTitle, "Fairy Tail 404.ovol002", 404.15f) } @Test @@ -216,7 +214,6 @@ class ChapterRecognitionTest { assertChapter(mangaTitle, "Fairy Tail 404.specialvol002", 404.97f) assertChapter(mangaTitle, "Fairy Tail 404 specialvol002", 404.97f) - assertChapter(mangaTitle, "Fairy Tail 404.svol002", 404.19f) } @Test @@ -253,6 +250,16 @@ class ChapterRecognitionTest { assertChapter("One Outs", "One Outs 001", 1f) } + @Test + fun `Chapters containing ordinals`() { + val mangaTitle = "The Sister of the Woods with a Thousand Young" + + assertChapter(mangaTitle, "The 1st Night", 1f) + assertChapter(mangaTitle, "The 2nd Night", 2f) + assertChapter(mangaTitle, "The 3rd Night", 3f) + assertChapter(mangaTitle, "The 4th Night", 4f) + } + private fun assertChapter(mangaTitle: String, name: String, expected: Float) { val chapterNumber = ChapterRecognition.parseChapterNumber(mangaTitle, name) assertEquals(chapterNumber, expected)