mirror of
https://github.com/gedoor/legado.git
synced 2024-07-06 23:47:49 +08:00
优化
This commit is contained in:
parent
830d6da5c7
commit
2f29c117a2
@ -5,18 +5,32 @@ import androidx.annotation.Keep
|
||||
import com.script.SimpleBindings
|
||||
import com.script.rhino.RhinoScriptEngine
|
||||
import io.legado.app.constant.AppPattern.JS_PATTERN
|
||||
import io.legado.app.data.entities.*
|
||||
import io.legado.app.data.entities.BaseBook
|
||||
import io.legado.app.data.entities.BaseSource
|
||||
import io.legado.app.data.entities.Book
|
||||
import io.legado.app.data.entities.BookChapter
|
||||
import io.legado.app.data.entities.BookSource
|
||||
import io.legado.app.help.CacheManager
|
||||
import io.legado.app.help.JsExtensions
|
||||
import io.legado.app.help.http.CookieStore
|
||||
import io.legado.app.model.webBook.WebBook
|
||||
import io.legado.app.utils.*
|
||||
import io.legado.app.utils.GSON
|
||||
import io.legado.app.utils.NetworkUtils
|
||||
import io.legado.app.utils.fromJsonObject
|
||||
import io.legado.app.utils.isJson
|
||||
import io.legado.app.utils.printOnDebug
|
||||
import io.legado.app.utils.splitNotBlank
|
||||
import io.legado.app.utils.stackTraceStr
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import kotlinx.coroutines.withTimeout
|
||||
import org.apache.commons.text.StringEscapeUtils
|
||||
import org.jsoup.nodes.Node
|
||||
import org.mozilla.javascript.NativeObject
|
||||
import java.net.URL
|
||||
import java.util.regex.Pattern
|
||||
import kotlin.collections.component1
|
||||
import kotlin.collections.component2
|
||||
import kotlin.collections.set
|
||||
|
||||
/**
|
||||
* 解析规则获取结果
|
||||
@ -55,7 +69,10 @@ class AnalyzeRule(
|
||||
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
|
||||
if (content == null) throw AssertionError("内容不可空(Content cannot be null)")
|
||||
this.content = content
|
||||
isJSON = content.toString().isJson()
|
||||
isJSON = when (content) {
|
||||
is Node -> false
|
||||
else -> content.toString().isJson()
|
||||
}
|
||||
setBaseUrl(baseUrl)
|
||||
objectChangedXP = true
|
||||
objectChangedJS = true
|
||||
|
45
app/src/main/java/io/legado/app/utils/ByteArrayExtensions.kt
Normal file
45
app/src/main/java/io/legado/app/utils/ByteArrayExtensions.kt
Normal file
@ -0,0 +1,45 @@
|
||||
package io.legado.app.utils
|
||||
|
||||
|
||||
/**
|
||||
* Search the data byte array for the first occurrence
|
||||
* of the byte array pattern.
|
||||
*/
|
||||
fun ByteArray.indexOf(pattern: ByteArray, start: Int = 0, stop: Int = size): Int {
|
||||
val data = this
|
||||
val failure: IntArray = computeFailure(pattern)
|
||||
|
||||
var j = 0
|
||||
|
||||
for (i in start until stop) {
|
||||
while (j > 0 && pattern[j] != data[i]) {
|
||||
j = failure[j - 1]
|
||||
}
|
||||
if (pattern[j] == data[i]) {
|
||||
j++
|
||||
}
|
||||
if (j == pattern.size) {
|
||||
return i - pattern.size + 1
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the failure function using a boot-strapping process,
|
||||
* where the pattern is matched against itself.
|
||||
*/
|
||||
private fun computeFailure(pattern: ByteArray): IntArray {
|
||||
val failure = IntArray(pattern.size)
|
||||
var j = 0
|
||||
for (i in 1 until pattern.size) {
|
||||
while (j > 0 && pattern[j] != pattern[i]) {
|
||||
j = failure[j - 1]
|
||||
}
|
||||
if (pattern[j] == pattern[i]) {
|
||||
j++
|
||||
}
|
||||
failure[i] = j
|
||||
}
|
||||
return failure
|
||||
}
|
@ -5,8 +5,6 @@ import io.legado.app.lib.icu4j.CharsetDetector
|
||||
import org.jsoup.Jsoup
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* 自动获取文件的编码
|
||||
@ -14,9 +12,21 @@ import java.util.*
|
||||
@Suppress("MemberVisibilityCanBePrivate", "unused")
|
||||
object EncodingDetect {
|
||||
|
||||
fun getHtmlEncode(bytes: ByteArray): String? {
|
||||
private val headTagRegex = "(?i)<head>[\\s\\S]*?</head>".toRegex()
|
||||
private val headOpenBytes = "<head>".toByteArray()
|
||||
private val headCloseBytes = "</head>".toByteArray()
|
||||
|
||||
fun getHtmlEncode(bytes: ByteArray): String {
|
||||
try {
|
||||
val doc = Jsoup.parse(String(bytes, StandardCharsets.UTF_8))
|
||||
var head: String? = null
|
||||
val startIndex = bytes.indexOf(headOpenBytes)
|
||||
if (startIndex > -1) {
|
||||
val endIndex = bytes.indexOf(headCloseBytes, startIndex)
|
||||
if (endIndex > -1) {
|
||||
head = String(bytes.copyOfRange(startIndex, endIndex + headCloseBytes.size))
|
||||
}
|
||||
}
|
||||
val doc = Jsoup.parseBodyFragment(head ?: headTagRegex.find(String(bytes))!!.value)
|
||||
val metaTags = doc.getElementsByTag("meta")
|
||||
var charsetStr: String
|
||||
for (metaTag in metaTags) {
|
||||
@ -24,16 +34,14 @@ object EncodingDetect {
|
||||
if (!TextUtils.isEmpty(charsetStr)) {
|
||||
return charsetStr
|
||||
}
|
||||
val content = metaTag.attr("content")
|
||||
val httpEquiv = metaTag.attr("http-equiv")
|
||||
if (httpEquiv.lowercase(Locale.getDefault()) == "content-type") {
|
||||
charsetStr = if (content.lowercase(Locale.getDefault()).contains("charset")) {
|
||||
content.substring(
|
||||
content.lowercase(Locale.getDefault())
|
||||
.indexOf("charset") + "charset=".length
|
||||
)
|
||||
if (httpEquiv.equals("content-type", true)) {
|
||||
val content = metaTag.attr("content")
|
||||
val idx = content.indexOf("charset=", ignoreCase = true)
|
||||
charsetStr = if (idx > -1) {
|
||||
content.substring(idx + "charset=".length)
|
||||
} else {
|
||||
content.substring(content.lowercase(Locale.getDefault()).indexOf(";") + 1)
|
||||
content.substringAfter(";")
|
||||
}
|
||||
if (!TextUtils.isEmpty(charsetStr)) {
|
||||
return charsetStr
|
||||
|
@ -17,6 +17,9 @@ object HtmlFormatter {
|
||||
"<img[^>]*\\ssrc\\s*=\\s*\"([^\"{>]*\\{(?:[^{}]|\\{[^}>]+\\})+\\})\"[^>]*>|<img[^>]*\\sdata-[^=>]*=\\s*\"([^\">]*)\"[^>]*>|<img[^>]*\\ssrc\\s*=\\s*\"([^\">]*)\"[^>]*>",
|
||||
Pattern.CASE_INSENSITIVE
|
||||
)
|
||||
private val indent1Regex = "\\s*\\n+\\s*".toRegex()
|
||||
private val indent2Regex = "^[\\n\\s]+".toRegex()
|
||||
private val lastRegex = "[\\n\\s]+$".toRegex()
|
||||
|
||||
fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
|
||||
html ?: return ""
|
||||
@ -26,9 +29,9 @@ object HtmlFormatter {
|
||||
.replace(wrapHtmlRegex, "\n")
|
||||
.replace(commentRegex, "")
|
||||
.replace(otherRegex, "")
|
||||
.replace("\\s*\\n+\\s*".toRegex(), "\n ")
|
||||
.replace("^[\\n\\s]+".toRegex(), " ")
|
||||
.replace("[\\n\\s]+$".toRegex(), "")
|
||||
.replace(indent1Regex, "\n ")
|
||||
.replace(indent2Regex, " ")
|
||||
.replace(lastRegex, "")
|
||||
}
|
||||
|
||||
fun formatKeepImg(html: String?, redirectUrl: URL? = null): String {
|
||||
|
Loading…
Reference in New Issue
Block a user