mirror of
https://github.com/mihonapp/mihon.git
synced 2024-11-07 20:31:02 -05:00
Initial chapter number recognition (needs improvement). Remove an old class.
This commit is contained in:
parent
bc83bd7033
commit
692d3c1b2c
5 changed files with 198 additions and 54 deletions
|
@ -32,6 +32,9 @@ public class Chapter {
|
|||
@StorIOSQLiteColumn(name = ChaptersTable.COLUMN_DATE_UPLOAD)
|
||||
public long date_upload;
|
||||
|
||||
@StorIOSQLiteColumn(name = ChaptersTable.COLUMN_CHAPTER_NUMBER)
|
||||
public float chapter_number;
|
||||
|
||||
public int downloaded;
|
||||
|
||||
public static final int UNKNOWN = 0;
|
||||
|
@ -58,7 +61,7 @@ public class Chapter {
|
|||
}
|
||||
|
||||
public static Chapter newChapter() {
|
||||
Chapter c = new Chapter();
|
||||
return c;
|
||||
return new Chapter();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,6 +31,9 @@ public class ChaptersTable {
|
|||
@NonNull
|
||||
public static final String COLUMN_LAST_PAGE_READ = "last_page_read";
|
||||
|
||||
@NonNull
|
||||
public static final String COLUMN_CHAPTER_NUMBER = "chapter_number";
|
||||
|
||||
@NonNull
|
||||
public static String getCreateTableQuery() {
|
||||
return "CREATE TABLE " + TABLE + "("
|
||||
|
@ -40,6 +43,7 @@ public class ChaptersTable {
|
|||
+ COLUMN_NAME + " TEXT NOT NULL, "
|
||||
+ COLUMN_READ + " BOOLEAN NOT NULL, "
|
||||
+ COLUMN_LAST_PAGE_READ + " INT NOT NULL, "
|
||||
+ COLUMN_CHAPTER_NUMBER + " FLOAT NOT NULL, "
|
||||
+ COLUMN_DATE_FETCH + " LONG NOT NULL, "
|
||||
+ COLUMN_DATE_UPLOAD + " LONG NOT NULL, "
|
||||
+ "FOREIGN KEY(" + COLUMN_MANGA_ID + ") REFERENCES " + MangasTable.TABLE + "(" + MangasTable.COLUMN_ID + ") "
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
package eu.kanade.mangafeed.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import eu.kanade.mangafeed.data.models.Chapter;
|
||||
import eu.kanade.mangafeed.data.models.Manga;
|
||||
|
||||
public class ChapterRecognition {
|
||||
|
||||
private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)");
|
||||
private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)");
|
||||
|
||||
public static void parseChapterNumber(Chapter chapter, Manga manga) {
|
||||
if (chapter.chapter_number != 0)
|
||||
return;
|
||||
|
||||
// Remove spaces and convert to lower case
|
||||
String name = replaceIrrelevantCharacters(chapter.name);
|
||||
Matcher matcher;
|
||||
|
||||
// Safest option, the chapter has a token prepended
|
||||
matcher = p1.matcher(name);
|
||||
if (matcher.find()) {
|
||||
chapter.chapter_number = Float.parseFloat(matcher.group(1));
|
||||
return;
|
||||
}
|
||||
|
||||
// If there's only one number, use it
|
||||
matcher = p2.matcher(name);
|
||||
List<Float> occurences = getAllOccurrences(matcher);
|
||||
if (occurences.size() == 1) {
|
||||
chapter.chapter_number = occurences.get(0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to remove the manga name from the chapter, and try again
|
||||
String mangaName = replaceIrrelevantCharacters(manga.title);
|
||||
String nameWithoutManga = difference(mangaName, name);
|
||||
if (!nameWithoutManga.isEmpty()) {
|
||||
matcher = p2.matcher(nameWithoutManga);
|
||||
occurences = getAllOccurrences(matcher);
|
||||
if (occurences.size() == 1) {
|
||||
chapter.chapter_number = occurences.get(0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO more checks (maybe levenshtein?)
|
||||
|
||||
}
|
||||
|
||||
public static List<Float> getAllOccurrences(Matcher matcher) {
|
||||
List<Float> occurences = new ArrayList<>();
|
||||
while (matcher.find()) {
|
||||
try {
|
||||
float value = Float.parseFloat(matcher.group());
|
||||
if (!occurences.contains(value))
|
||||
occurences.add(value);
|
||||
} catch (NumberFormatException e) { /* Do nothing */ }
|
||||
}
|
||||
return occurences;
|
||||
}
|
||||
|
||||
public static String replaceIrrelevantCharacters(String str) {
|
||||
return str.replaceAll("\\s+", "").toLowerCase();
|
||||
}
|
||||
|
||||
public static String difference(String str1, String str2) {
|
||||
if (str1 == null) {
|
||||
return str2;
|
||||
}
|
||||
if (str2 == null) {
|
||||
return str1;
|
||||
}
|
||||
int at = indexOfDifference(str1, str2);
|
||||
if (at == -1) {
|
||||
return "";
|
||||
}
|
||||
return str2.substring(at);
|
||||
}
|
||||
public static int indexOfDifference(String str1, String str2) {
|
||||
if (str1 == str2) {
|
||||
return -1;
|
||||
}
|
||||
if (str1 == null || str2 == null) {
|
||||
return 0;
|
||||
}
|
||||
int i;
|
||||
for (i = 0; i < str1.length() && i < str2.length(); ++i) {
|
||||
if (str1.charAt(i) != str2.charAt(i)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < str2.length() || i < str1.length()) {
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
package eu.kanade.mangafeed.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import eu.kanade.mangafeed.data.models.Chapter;
|
||||
import eu.kanade.mangafeed.data.models.Manga;
|
||||
|
||||
/**
|
||||
* Created by len on 8/10/15.
|
||||
*/
|
||||
public class DummyDataUtil {
|
||||
|
||||
public static List<Manga> createDummyManga() {
|
||||
ArrayList<Manga> mangas = new ArrayList<>();
|
||||
mangas.add(createDummyManga("One Piece"));
|
||||
mangas.add(createDummyManga("Berserk"));
|
||||
mangas.add(createDummyManga("Horimiya"));
|
||||
mangas.add(createDummyManga("Übel Blatt"));
|
||||
|
||||
return mangas;
|
||||
}
|
||||
|
||||
private static Manga createDummyManga(String title) {
|
||||
Manga m = new Manga();
|
||||
m.title = title;
|
||||
m.url="http://example.com";
|
||||
m.artist="Eiichiro Oda";
|
||||
m.author="Eiichiro Oda";
|
||||
m.description="...";
|
||||
m.genre="Action, Drama";
|
||||
m.status="Ongoing";
|
||||
m.thumbnail_url="http://example.com/pic.png";
|
||||
return m;
|
||||
}
|
||||
|
||||
public static List<Chapter> createDummyChapters() {
|
||||
List<Chapter> chapters = new ArrayList<>();
|
||||
Chapter c;
|
||||
|
||||
for (int i = 1; i < 50; i++) {
|
||||
c = new Chapter();
|
||||
c.manga_id = 1L;
|
||||
c.name = "Chapter " + i;
|
||||
c.url = "http://example.com/1";
|
||||
chapters.add(c);
|
||||
}
|
||||
|
||||
return chapters;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package eu.kanade.mangafeed;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import eu.kanade.mangafeed.data.models.Chapter;
|
||||
import eu.kanade.mangafeed.data.models.Manga;
|
||||
import eu.kanade.mangafeed.util.ChapterRecognition;
|
||||
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
public class ChapterRecognitionTest {
|
||||
|
||||
Manga randomManga;
|
||||
|
||||
private Chapter createChapter(String title) {
|
||||
Chapter chapter = new Chapter();
|
||||
chapter.name = title;
|
||||
return chapter;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
randomManga = new Manga();
|
||||
randomManga.title = "Something";
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithOneDigit() {
|
||||
Chapter c = createChapter("Ch.3: Self-proclaimed Genius");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(3f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithVolumeBefore() {
|
||||
Chapter c = createChapter("Vol.1 Ch.4: Misrepresentation");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(4f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithVolumeAndVersionNumber() {
|
||||
Chapter c = createChapter("Vol.1 Ch.3 (v2) Read Online");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(3f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithVolumeAndNumberInTitle() {
|
||||
Chapter c = createChapter("Vol.15 Ch.90: Here Blooms the Daylily, Part 4");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(90f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithVolumeAndSpecialChapter() {
|
||||
Chapter c = createChapter("Vol.10 Ch.42.5: Homecoming (Beginning)");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(42.5f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithJustANumber() {
|
||||
Chapter c = createChapter("Homecoming (Beginning) 42");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(42f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithJustASpecialChapter() {
|
||||
Chapter c = createChapter("Homecoming (Beginning) 42.5");
|
||||
ChapterRecognition.parseChapterNumber(c, randomManga);
|
||||
assertThat(c.chapter_number, is(42.5f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithNumberinMangaTitle() {
|
||||
Chapter c = createChapter("3x3 Eyes 96");
|
||||
Manga m = new Manga();
|
||||
m.title = "3x3 Eyes";
|
||||
ChapterRecognition.parseChapterNumber(c, m);
|
||||
assertThat(c.chapter_number, is(96f));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in a new issue