This commit is contained in:
Horis 2023-12-28 23:24:48 +08:00
parent 830d6da5c7
commit 2f29c117a2
4 changed files with 91 additions and 18 deletions

View File

@ -5,18 +5,32 @@ import androidx.annotation.Keep
import com.script.SimpleBindings
import com.script.rhino.RhinoScriptEngine
import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.*
import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BaseSource
import io.legado.app.data.entities.Book
import io.legado.app.data.entities.BookChapter
import io.legado.app.data.entities.BookSource
import io.legado.app.help.CacheManager
import io.legado.app.help.JsExtensions
import io.legado.app.help.http.CookieStore
import io.legado.app.model.webBook.WebBook
import io.legado.app.utils.*
import io.legado.app.utils.GSON
import io.legado.app.utils.NetworkUtils
import io.legado.app.utils.fromJsonObject
import io.legado.app.utils.isJson
import io.legado.app.utils.printOnDebug
import io.legado.app.utils.splitNotBlank
import io.legado.app.utils.stackTraceStr
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withTimeout
import org.apache.commons.text.StringEscapeUtils
import org.jsoup.nodes.Node
import org.mozilla.javascript.NativeObject
import java.net.URL
import java.util.regex.Pattern
import kotlin.collections.component1
import kotlin.collections.component2
import kotlin.collections.set
/**
* 解析规则获取结果
@ -55,7 +69,10 @@ class AnalyzeRule(
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
if (content == null) throw AssertionError("内容不可空Content cannot be null")
this.content = content
isJSON = content.toString().isJson()
isJSON = when (content) {
is Node -> false
else -> content.toString().isJson()
}
setBaseUrl(baseUrl)
objectChangedXP = true
objectChangedJS = true

View File

@ -0,0 +1,45 @@
package io.legado.app.utils
/**
* Search the data byte array for the first occurrence
* of the byte array pattern.
*/
fun ByteArray.indexOf(pattern: ByteArray, start: Int = 0, stop: Int = size): Int {
val data = this
val failure: IntArray = computeFailure(pattern)
var j = 0
for (i in start until stop) {
while (j > 0 && pattern[j] != data[i]) {
j = failure[j - 1]
}
if (pattern[j] == data[i]) {
j++
}
if (j == pattern.size) {
return i - pattern.size + 1
}
}
return -1
}
/**
* Computes the failure function using a boot-strapping process,
* where the pattern is matched against itself.
*/
private fun computeFailure(pattern: ByteArray): IntArray {
val failure = IntArray(pattern.size)
var j = 0
for (i in 1 until pattern.size) {
while (j > 0 && pattern[j] != pattern[i]) {
j = failure[j - 1]
}
if (pattern[j] == pattern[i]) {
j++
}
failure[i] = j
}
return failure
}

View File

@ -5,8 +5,6 @@ import io.legado.app.lib.icu4j.CharsetDetector
import org.jsoup.Jsoup
import java.io.File
import java.io.FileInputStream
import java.nio.charset.StandardCharsets
import java.util.*
/**
* 自动获取文件的编码
@ -14,9 +12,21 @@ import java.util.*
@Suppress("MemberVisibilityCanBePrivate", "unused")
object EncodingDetect {
fun getHtmlEncode(bytes: ByteArray): String? {
private val headTagRegex = "(?i)<head>[\\s\\S]*?</head>".toRegex()
private val headOpenBytes = "<head>".toByteArray()
private val headCloseBytes = "</head>".toByteArray()
fun getHtmlEncode(bytes: ByteArray): String {
try {
val doc = Jsoup.parse(String(bytes, StandardCharsets.UTF_8))
var head: String? = null
val startIndex = bytes.indexOf(headOpenBytes)
if (startIndex > -1) {
val endIndex = bytes.indexOf(headCloseBytes, startIndex)
if (endIndex > -1) {
head = String(bytes.copyOfRange(startIndex, endIndex + headCloseBytes.size))
}
}
val doc = Jsoup.parseBodyFragment(head ?: headTagRegex.find(String(bytes))!!.value)
val metaTags = doc.getElementsByTag("meta")
var charsetStr: String
for (metaTag in metaTags) {
@ -24,16 +34,14 @@ object EncodingDetect {
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr
}
val content = metaTag.attr("content")
val httpEquiv = metaTag.attr("http-equiv")
if (httpEquiv.lowercase(Locale.getDefault()) == "content-type") {
charsetStr = if (content.lowercase(Locale.getDefault()).contains("charset")) {
content.substring(
content.lowercase(Locale.getDefault())
.indexOf("charset") + "charset=".length
)
if (httpEquiv.equals("content-type", true)) {
val content = metaTag.attr("content")
val idx = content.indexOf("charset=", ignoreCase = true)
charsetStr = if (idx > -1) {
content.substring(idx + "charset=".length)
} else {
content.substring(content.lowercase(Locale.getDefault()).indexOf(";") + 1)
content.substringAfter(";")
}
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr

View File

@ -17,6 +17,9 @@ object HtmlFormatter {
"<img[^>]*\\ssrc\\s*=\\s*\"([^\"{>]*\\{(?:[^{}]|\\{[^}>]+\\})+\\})\"[^>]*>|<img[^>]*\\sdata-[^=>]*=\\s*\"([^\">]*)\"[^>]*>|<img[^>]*\\ssrc\\s*=\\s*\"([^\">]*)\"[^>]*>",
Pattern.CASE_INSENSITIVE
)
private val indent1Regex = "\\s*\\n+\\s*".toRegex()
private val indent2Regex = "^[\\n\\s]+".toRegex()
private val lastRegex = "[\\n\\s]+$".toRegex()
fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
html ?: return ""
@ -26,9 +29,9 @@ object HtmlFormatter {
.replace(wrapHtmlRegex, "\n")
.replace(commentRegex, "")
.replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "")
.replace(indent1Regex, "\n  ")
.replace(indent2Regex, "  ")
.replace(lastRegex, "")
}
fun formatKeepImg(html: String?, redirectUrl: URL? = null): String {