mirror of
https://github.com/mihonapp/mihon.git
synced 2024-11-07 20:31:02 -05:00
Parser improvements
This commit is contained in:
parent
585f7ec17d
commit
143303f7df
3 changed files with 59 additions and 15 deletions
|
@ -9,8 +9,10 @@ import eu.kanade.tachiyomi.data.source.getLanguages
|
||||||
import eu.kanade.tachiyomi.data.source.model.MangasPage
|
import eu.kanade.tachiyomi.data.source.model.MangasPage
|
||||||
import eu.kanade.tachiyomi.data.source.model.Page
|
import eu.kanade.tachiyomi.data.source.model.Page
|
||||||
import eu.kanade.tachiyomi.util.asJsoup
|
import eu.kanade.tachiyomi.util.asJsoup
|
||||||
|
import eu.kanade.tachiyomi.util.attrOrText
|
||||||
import okhttp3.Request
|
import okhttp3.Request
|
||||||
import okhttp3.Response
|
import okhttp3.Response
|
||||||
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
import java.text.SimpleDateFormat
|
import java.text.SimpleDateFormat
|
||||||
import java.util.*
|
import java.util.*
|
||||||
|
@ -127,28 +129,59 @@ class YamlOnlineSource(context: Context, mappings: Map<*, *>) : OnlineSource(con
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun pageListParse(response: Response, pages: MutableList<Page>) {
|
override fun pageListParse(response: Response, pages: MutableList<Page>) {
|
||||||
val document = response.asJsoup()
|
val body = response.body().string()
|
||||||
|
val url = response.request().url().toString()
|
||||||
|
|
||||||
|
// TODO lazy initialization in Kotlin 1.1
|
||||||
|
val document = Jsoup.parse(body, url)
|
||||||
|
|
||||||
with(map.pages) {
|
with(map.pages) {
|
||||||
val url = response.request().url().toString()
|
// Capture a list of values where page urls will be resolved.
|
||||||
pages_css?.let {
|
val capturedPages = if (pages_regex != null)
|
||||||
for (element in document.select(it)) {
|
pages_regex!!.toRegex().findAll(body).map { it.value }.toList()
|
||||||
val value = element.attr(pages_attr)
|
else if (pages_css != null)
|
||||||
val pageUrl = replace?.let { url.replace(it.toRegex(), replacement!!.replace("\$value", value)) } ?: value
|
document.select(pages_css).map { it.attrOrText(pages_attr!!) }
|
||||||
pages.add(Page(pages.size, pageUrl))
|
else
|
||||||
}
|
null
|
||||||
|
|
||||||
|
// For each captured value, obtain the url and create a new page.
|
||||||
|
capturedPages?.forEach { value ->
|
||||||
|
// If the captured value isn't an url, we have to use replaces with the chapter url.
|
||||||
|
val pageUrl = if (replace != null && replacement != null)
|
||||||
|
url.replace(replace!!.toRegex(), replacement!!.replace("\$value", value))
|
||||||
|
else
|
||||||
|
value
|
||||||
|
|
||||||
|
pages.add(Page(pages.size, pageUrl))
|
||||||
}
|
}
|
||||||
|
|
||||||
for ((i, element) in document.select(image_css).withIndex()) {
|
// Capture a list of images.
|
||||||
pages.getOrNull(i)?.imageUrl = element.absUrl(image_attr)
|
val capturedImages = if (image_regex != null)
|
||||||
|
image_regex!!.toRegex().findAll(body).map { it.groups[1]?.value }.toList()
|
||||||
|
else if (image_css != null)
|
||||||
|
document.select(image_css).map { it.absUrl(image_attr) }
|
||||||
|
else
|
||||||
|
null
|
||||||
|
|
||||||
|
// Assign the image url to each page
|
||||||
|
capturedImages?.forEachIndexed { i, url ->
|
||||||
|
val page = pages.getOrElse(i) { Page(i, "").apply { pages.add(this) } }
|
||||||
|
page.imageUrl = url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun imageUrlParse(response: Response): String {
|
override fun imageUrlParse(response: Response): String {
|
||||||
val document = response.asJsoup()
|
val body = response.body().string()
|
||||||
return with(map.pages) {
|
val url = response.request().url().toString()
|
||||||
document.select(image_css).first().absUrl(image_attr)
|
|
||||||
|
with(map.pages) {
|
||||||
|
return if (image_regex != null)
|
||||||
|
image_regex!!.toRegex().find(body)!!.groups[1]!!.value
|
||||||
|
else if (image_css != null)
|
||||||
|
Jsoup.parse(body, url).select(image_css).first().absUrl(image_attr)
|
||||||
|
else
|
||||||
|
throw Exception("image_regex and image_css are null")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -194,6 +194,9 @@ class DateNode(private val map: Map<String, Any?>) : SelectableNode(map) {
|
||||||
|
|
||||||
class PagesNode(private val map: Map<String, Any?>) {
|
class PagesNode(private val map: Map<String, Any?>) {
|
||||||
|
|
||||||
|
val pages_regex: String?
|
||||||
|
get() = map["pages_regex"] as? String
|
||||||
|
|
||||||
val pages_css: String?
|
val pages_css: String?
|
||||||
get() = map["pages_css"] as? String
|
get() = map["pages_css"] as? String
|
||||||
|
|
||||||
|
@ -206,7 +209,11 @@ class PagesNode(private val map: Map<String, Any?>) {
|
||||||
val replacement: String?
|
val replacement: String?
|
||||||
get() = map["url_replacement"] as? String
|
get() = map["url_replacement"] as? String
|
||||||
|
|
||||||
val image_css: String by map
|
val image_regex: String?
|
||||||
|
get() = map["image_regex"] as? String
|
||||||
|
|
||||||
|
val image_css: String?
|
||||||
|
get() = map["image_css"] as? String
|
||||||
|
|
||||||
val image_attr: String
|
val image_attr: String
|
||||||
get() = map["image_attr"] as? String ?: "src"
|
get() = map["image_attr"] as? String ?: "src"
|
||||||
|
|
|
@ -13,6 +13,10 @@ fun Element.selectInt(css: String, defaultValue: Int = 0): Int {
|
||||||
return select(css).first()?.text()?.toInt() ?: defaultValue
|
return select(css).first()?.text()?.toInt() ?: defaultValue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun Element.attrOrText(css: String): String {
|
||||||
|
return if (css != "text") attr(css) else text()
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a Jsoup document for this response.
|
* Returns a Jsoup document for this response.
|
||||||
* @param html the body of the response. Use only if the body was read before calling this method.
|
* @param html the body of the response. Use only if the body was read before calling this method.
|
||||||
|
|
Loading…
Reference in a new issue