This commit is contained in:
Horis 2023-12-14 22:42:57 +08:00
parent 87c930c2d9
commit e59de710f8
6 changed files with 134 additions and 140 deletions

View File

@ -268,6 +268,10 @@ object Debug {
log(debugSource, "︽目录页解析完成")
log(debugSource, showTime = false)
val toc = chapters.filter { !(it.isVolume && it.url.startsWith(it.title)) }
if (toc.isEmpty()) {
log(debugSource, "≡没有正文章节")
return@onSuccess
}
val nextChapterUrl = toc.getOrNull(1)?.url ?: toc.first().url
contentDebug(scope, bookSource, book, toc.first(), nextChapterUrl)
}

View File

@ -28,8 +28,8 @@ class AnalyzeByJSonPath(json: Any) {
* 解决阅读&&||与jsonPath支持的&&||之间的冲突
* 解决{$.rule}形式规则可能匹配错误的问题旧规则用正则解析内容含}的json文本时用规则中的字段去匹配这种内容会匹配错误.现改用平衡嵌套方法解决这个问题
* */
fun getString(rule: String): String? {
if (rule.isEmpty()) return null
fun getString(rule: String): String {
if (rule.isEmpty()) return ""
var result: String
val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡
val rules = ruleAnalyzes.splitRule("&&", "||")
@ -57,7 +57,7 @@ class AnalyzeByJSonPath(json: Any) {
val textList = arrayListOf<String>()
for (rl in rules) {
val temp = getString(rl)
if (!temp.isNullOrEmpty()) {
if (temp.isNotEmpty()) {
textList.add(temp)
if (ruleAnalyzes.elementsType == "||") {
break
@ -126,7 +126,7 @@ class AnalyzeByJSonPath(json: Any) {
return ctx.read(rule)
}
internal fun getList(rule: String): ArrayList<Any>? {
internal fun getList(rule: String): ArrayList<Any> {
val result = ArrayList<Any>()
if (rule.isEmpty()) return result
val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡
@ -143,7 +143,7 @@ class AnalyzeByJSonPath(json: Any) {
val results = ArrayList<ArrayList<*>>()
for (rl in rules) {
val temp = getList(rl)
if (!temp.isNullOrEmpty()) {
if (temp.isNotEmpty()) {
results.add(temp)
if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") {
break

View File

@ -42,8 +42,8 @@ class AnalyzeByJSoup(doc: Any) {
* 合并内容列表,得到内容
*/
internal fun getString(ruleStr: String) =
if (ruleStr.isEmpty()) null
else getStringList(ruleStr).takeIf { it.isNotEmpty() }?.joinToString("\n")
if (ruleStr.isEmpty()) ""
else getStringList(ruleStr).takeIf { it.isNotEmpty() }?.joinToString("\n") ?: ""
/**
* 获取一个字符串

View File

@ -6,11 +6,11 @@ import java.util.regex.Pattern
@Keep
object AnalyzeByRegex {
fun getElement(res: String, regs: Array<String>, index: Int = 0): List<String>? {
fun getElement(res: String, regs: Array<String>, index: Int = 0): List<String> {
var vIndex = index
val resM = Pattern.compile(regs[vIndex]).matcher(res)
if (!resM.find()) {
return null
return emptyList()
}
// 判断索引的规则是最后一个规则
return if (vIndex + 1 == regs.size) {

View File

@ -49,21 +49,21 @@ class AnalyzeByXPath(doc: Any) {
}
}
internal fun getElements(xPath: String): List<JXNode>? {
internal fun getElements(xPath: String): List<JXNode> {
if (xPath.isEmpty()) return null
if (xPath.isEmpty()) return emptyList()
val jxNodes = ArrayList<JXNode>()
val ruleAnalyzes = RuleAnalyzer(xPath)
val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
if (rules.size == 1) {
return getResult(rules[0])
return getResult(rules[0]) ?: emptyList()
} else {
val results = ArrayList<List<JXNode>>()
for (rl in rules) {
val temp = getElements(rl)
if (!temp.isNullOrEmpty()) {
if (temp.isNotEmpty()) {
results.add(temp)
if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") {
break
@ -130,19 +130,19 @@ class AnalyzeByXPath(doc: Any) {
return result
}
fun getString(rule: String): String? {
fun getString(rule: String): String {
val ruleAnalyzes = RuleAnalyzer(rule)
val rules = ruleAnalyzes.splitRule("&&", "||")
if (rules.size == 1) {
getResult(rule)?.let {
return TextUtils.join("\n", it)
}
return null
return ""
} else {
val textList = arrayListOf<String>()
for (rl in rules) {
val temp = getString(rl)
if (!temp.isNullOrEmpty()) {
if (temp.isNotEmpty()) {
textList.add(temp)
if (ruleAnalyzes.elementsType == "||") {
break

View File

@ -140,58 +140,58 @@ class AnalyzeRule(
mContent: Any? = null,
isUrl: Boolean = false
): List<String>? {
var result: Any? = null
val content = mContent ?: this.content
if (content != null && ruleList.isNotEmpty()) {
result = content
if (result is NativeObject) {
val sourceRule = ruleList.first()
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule]
}?.let {
if (content == null || ruleList.isEmpty()) {
return null
}
var result: Any = content
if (result is NativeObject) {
val sourceRule = ruleList.first()
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule] ?: emptyList<String>()
}
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) {
result = result.map {
replaceRegex(it.toString(), sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result?.let {
if (sourceRule.rule.isNotEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, result)
Mode.Json -> getAnalyzeByJSonPath(it).getStringList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getStringList(sourceRule.rule)
Mode.Default -> getAnalyzeByJSoup(it).getStringList(sourceRule.rule)
else -> sourceRule.rule
}
}
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) {
val newList = ArrayList<String>()
for (item in result as List<*>) {
newList.add(replaceRegex(item.toString(), sourceRule))
}
result = newList
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
if (sourceRule.rule.isNotEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, result) ?: "null"
Mode.Json -> getAnalyzeByJSonPath(result).getStringList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getStringList(sourceRule.rule)
Mode.Default -> getAnalyzeByJSoup(result).getStringList(sourceRule.rule)
else -> sourceRule.rule
}
}
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) {
result = result.map {
replaceRegex(it.toString(), sourceRule)
}
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
}
if (result == null) return null
if (result is String) {
result = (result as String).split("\n")
result = result.split("\n")
}
if (isUrl) {
val urlList = ArrayList<String>()
if (result is List<*>) {
for (url in result as List<*>) {
for (url in result) {
val absoluteURL = NetworkUtils.getAbsoluteURL(redirectUrl, url.toString())
if (absoluteURL.isNotEmpty() && !urlList.contains(absoluteURL)) {
urlList.add(absoluteURL)
@ -227,50 +227,47 @@ class AnalyzeRule(
isUrl: Boolean = false,
unescape: Boolean = true
): String {
var result: Any? = null
val content = mContent ?: this.content
if (content != null && ruleList.isNotEmpty()) {
result = content
if (result is NativeObject) {
val sourceRule = ruleList.first()
if (content == null || ruleList.isEmpty()) {
return if (isUrl) baseUrl ?: "" else ""
}
var result: Any = content
if (result is NativeObject) {
val sourceRule = ruleList.first()
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = (if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule]?.toString() ?: ""
}).let {
replaceRegex(it, sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule]?.toString()
}?.let {
replaceRegex(it, sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result?.let {
if (sourceRule.rule.isNotBlank() || sourceRule.replaceRegex.isEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, it)
Mode.Json -> getAnalyzeByJSonPath(it).getString(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getString(sourceRule.rule)
Mode.Default -> if (isUrl) {
getAnalyzeByJSoup(it).getString0(sourceRule.rule)
} else {
getAnalyzeByJSoup(it).getString(sourceRule.rule)
}
if (sourceRule.rule.isNotBlank() || sourceRule.replaceRegex.isEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, result) ?: ""
Mode.Json -> getAnalyzeByJSonPath(result).getString(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getString(sourceRule.rule)
Mode.Default -> if (isUrl) {
getAnalyzeByJSoup(result).getString0(sourceRule.rule)
} else {
getAnalyzeByJSoup(result).getString(sourceRule.rule)
}
else -> sourceRule.rule
}
}
if ((result != null) && sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
else -> sourceRule.rule
}
}
if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
}
if (result == null) result = ""
val str = if (unescape) {
StringEscapeUtils.unescapeHtml4(result.toString())
} else result.toString()
@ -289,30 +286,28 @@ class AnalyzeRule(
*/
fun getElement(ruleStr: String): Any? {
if (TextUtils.isEmpty(ruleStr)) return null
var result: Any? = null
val content = this.content
val content = this.content ?: return null
val ruleList = splitSourceRule(ruleStr, true)
if (content != null && ruleList.isNotEmpty()) {
result = content
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result?.let {
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElement(
result.toString(),
sourceRule.rule.splitNotBlank("&&")
)
if (ruleList.isEmpty()) {
return null
}
var result = content
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElement(
result.toString(),
sourceRule.rule.splitNotBlank("&&")
)
Mode.Js -> evalJS(sourceRule.rule, it)
Mode.Json -> getAnalyzeByJSonPath(it).getObject(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(it).getElements(sourceRule.rule)
}
if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
Mode.Js -> evalJS(sourceRule.rule, result) ?: Any()
Mode.Json -> getAnalyzeByJSonPath(result).getObject(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(result).getElements(sourceRule.rule)
}
if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
return result
@ -323,35 +318,30 @@ class AnalyzeRule(
*/
@Suppress("UNCHECKED_CAST")
fun getElements(ruleStr: String): List<Any> {
var result: Any? = null
val content = this.content
val content = this.content ?: return emptyList()
val ruleList = splitSourceRule(ruleStr, true)
if (content != null && ruleList.isNotEmpty()) {
result = content
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
result?.let {
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElements(
result.toString(),
sourceRule.rule.splitNotBlank("&&")
)
if (ruleList.isEmpty()) {
return emptyList()
}
var result = content
for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElements(
result.toString(),
sourceRule.rule.splitNotBlank("&&")
)
Mode.Js -> evalJS(sourceRule.rule, result)
Mode.Json -> getAnalyzeByJSonPath(it).getList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(it).getElements(sourceRule.rule)
}
if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
Mode.Js -> evalJS(sourceRule.rule, result) ?: emptyList<Any>()
Mode.Json -> getAnalyzeByJSonPath(result).getList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(result).getElements(sourceRule.rule)
}
if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
result?.let {
return it as List<Any>
}
return ArrayList()
return result as List<Any>
}
/**
@ -413,7 +403,7 @@ class AnalyzeRule(
/**
* getString 类规则缓存
*/
fun splitSourceRuleCacheString(ruleStr: String?) : List<SourceRule> {
fun splitSourceRuleCacheString(ruleStr: String?): List<SourceRule> {
if (ruleStr.isNullOrEmpty()) return emptyList()
val cacheRule = stringRuleCache[ruleStr]
return if (cacheRule != null) {