Improve chapter recognition

This commit is contained in:
inorichi 2015-11-30 20:33:27 +01:00
parent 01a8b13975
commit b2f44ff76a
2 changed files with 44 additions and 6 deletions

View file

@ -12,6 +12,7 @@ public class ChapterRecognition {
private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)"); private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)");
private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)"); private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)");
private static Pattern p3 = Pattern.compile("(\\d+[\\.,]?\\d*:)");
public static void parseChapterNumber(Chapter chapter, Manga manga) { public static void parseChapterNumber(Chapter chapter, Manga manga) {
if (chapter.chapter_number != -1) if (chapter.chapter_number != -1)
@ -28,9 +29,19 @@ public class ChapterRecognition {
return; return;
} }
List<Float> occurences;
// If there's only one number, use it // If there's only one number, use it
matcher = p2.matcher(name); matcher = p2.matcher(name);
List<Float> occurences = getAllOccurrences(matcher); occurences = getAllOccurrences(matcher);
if (occurences.size() == 1) {
chapter.chapter_number = occurences.get(0);
return;
}
// If it has a colon, the chapter number should be that one
matcher = p3.matcher(name);
occurences = getAllOccurrences(matcher);
if (occurences.size() == 1) { if (occurences.size() == 1) {
chapter.chapter_number = occurences.get(0); chapter.chapter_number = occurences.get(0);
return; return;
@ -55,12 +66,18 @@ public class ChapterRecognition {
public static List<Float> getAllOccurrences(Matcher matcher) { public static List<Float> getAllOccurrences(Matcher matcher) {
List<Float> occurences = new ArrayList<>(); List<Float> occurences = new ArrayList<>();
while (matcher.find()) { while (matcher.find()) {
// Match again to get only numbers from the captured text
String text = matcher.group();
Matcher m = p2.matcher(text);
if (m.find()) {
try { try {
float value = Float.parseFloat(matcher.group()); Float value = Float.parseFloat(m.group());
if (!occurences.contains(value)) if (!occurences.contains(value)) {
occurences.add(value); occurences.add(value);
}
} catch (NumberFormatException e) { /* Do nothing */ } } catch (NumberFormatException e) { /* Do nothing */ }
} }
}
return occurences; return occurences;
} }

View file

@ -84,4 +84,25 @@ public class ChapterRecognitionTest {
assertThat(c.chapter_number, is(96f)); assertThat(c.chapter_number, is(96f));
} }
@Test
public void testWithColonAtTheEnd() {
Chapter c = createChapter("Chapter 5: 365 days");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(5f));
}
@Test
public void testWithZeros() {
Chapter c = createChapter("Vol.001 Ch.003: Kaguya Doesn't Know Much");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(3f));
}
@Test
public void testRange() {
Chapter c = createChapter("Ch.191-200 Read Online");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(191f));
}
} }