This commit is contained in:
Horis 2023-12-15 15:07:13 +08:00
parent e59de710f8
commit 89caa35e47
5 changed files with 140 additions and 130 deletions

View File

@ -28,8 +28,8 @@ class AnalyzeByJSonPath(json: Any) {
* 解决阅读&&||与jsonPath支持的&&||之间的冲突 * 解决阅读&&||与jsonPath支持的&&||之间的冲突
* 解决{$.rule}形式规则可能匹配错误的问题旧规则用正则解析内容含}的json文本时用规则中的字段去匹配这种内容会匹配错误.现改用平衡嵌套方法解决这个问题 * 解决{$.rule}形式规则可能匹配错误的问题旧规则用正则解析内容含}的json文本时用规则中的字段去匹配这种内容会匹配错误.现改用平衡嵌套方法解决这个问题
* */ * */
fun getString(rule: String): String { fun getString(rule: String): String? {
if (rule.isEmpty()) return "" if (rule.isEmpty()) return null
var result: String var result: String
val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡 val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡
val rules = ruleAnalyzes.splitRule("&&", "||") val rules = ruleAnalyzes.splitRule("&&", "||")
@ -57,7 +57,7 @@ class AnalyzeByJSonPath(json: Any) {
val textList = arrayListOf<String>() val textList = arrayListOf<String>()
for (rl in rules) { for (rl in rules) {
val temp = getString(rl) val temp = getString(rl)
if (temp.isNotEmpty()) { if (!temp.isNullOrEmpty()) {
textList.add(temp) textList.add(temp)
if (ruleAnalyzes.elementsType == "||") { if (ruleAnalyzes.elementsType == "||") {
break break
@ -126,7 +126,7 @@ class AnalyzeByJSonPath(json: Any) {
return ctx.read(rule) return ctx.read(rule)
} }
internal fun getList(rule: String): ArrayList<Any> { internal fun getList(rule: String): ArrayList<Any>? {
val result = ArrayList<Any>() val result = ArrayList<Any>()
if (rule.isEmpty()) return result if (rule.isEmpty()) return result
val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡 val ruleAnalyzes = RuleAnalyzer(rule, true) //设置平衡组为代码平衡
@ -143,7 +143,7 @@ class AnalyzeByJSonPath(json: Any) {
val results = ArrayList<ArrayList<*>>() val results = ArrayList<ArrayList<*>>()
for (rl in rules) { for (rl in rules) {
val temp = getList(rl) val temp = getList(rl)
if (temp.isNotEmpty()) { if (!temp.isNullOrEmpty()) {
results.add(temp) results.add(temp)
if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") { if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") {
break break

View File

@ -42,8 +42,8 @@ class AnalyzeByJSoup(doc: Any) {
* 合并内容列表,得到内容 * 合并内容列表,得到内容
*/ */
internal fun getString(ruleStr: String) = internal fun getString(ruleStr: String) =
if (ruleStr.isEmpty()) "" if (ruleStr.isEmpty()) null
else getStringList(ruleStr).takeIf { it.isNotEmpty() }?.joinToString("\n") ?: "" else getStringList(ruleStr).takeIf { it.isNotEmpty() }?.joinToString("\n")
/** /**
* 获取一个字符串 * 获取一个字符串

View File

@ -6,11 +6,11 @@ import java.util.regex.Pattern
@Keep @Keep
object AnalyzeByRegex { object AnalyzeByRegex {
fun getElement(res: String, regs: Array<String>, index: Int = 0): List<String> { fun getElement(res: String, regs: Array<String>, index: Int = 0): List<String>? {
var vIndex = index var vIndex = index
val resM = Pattern.compile(regs[vIndex]).matcher(res) val resM = Pattern.compile(regs[vIndex]).matcher(res)
if (!resM.find()) { if (!resM.find()) {
return emptyList() return null
} }
// 判断索引的规则是最后一个规则 // 判断索引的规则是最后一个规则
return if (vIndex + 1 == regs.size) { return if (vIndex + 1 == regs.size) {

View File

@ -49,21 +49,21 @@ class AnalyzeByXPath(doc: Any) {
} }
} }
internal fun getElements(xPath: String): List<JXNode> { internal fun getElements(xPath: String): List<JXNode>? {
if (xPath.isEmpty()) return emptyList() if (xPath.isEmpty()) return null
val jxNodes = ArrayList<JXNode>() val jxNodes = ArrayList<JXNode>()
val ruleAnalyzes = RuleAnalyzer(xPath) val ruleAnalyzes = RuleAnalyzer(xPath)
val rules = ruleAnalyzes.splitRule("&&", "||", "%%") val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
if (rules.size == 1) { if (rules.size == 1) {
return getResult(rules[0]) ?: emptyList() return getResult(rules[0])
} else { } else {
val results = ArrayList<List<JXNode>>() val results = ArrayList<List<JXNode>>()
for (rl in rules) { for (rl in rules) {
val temp = getElements(rl) val temp = getElements(rl)
if (temp.isNotEmpty()) { if (!temp.isNullOrEmpty()) {
results.add(temp) results.add(temp)
if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") { if (temp.isNotEmpty() && ruleAnalyzes.elementsType == "||") {
break break
@ -130,19 +130,19 @@ class AnalyzeByXPath(doc: Any) {
return result return result
} }
fun getString(rule: String): String { fun getString(rule: String): String? {
val ruleAnalyzes = RuleAnalyzer(rule) val ruleAnalyzes = RuleAnalyzer(rule)
val rules = ruleAnalyzes.splitRule("&&", "||") val rules = ruleAnalyzes.splitRule("&&", "||")
if (rules.size == 1) { if (rules.size == 1) {
getResult(rule)?.let { getResult(rule)?.let {
return TextUtils.join("\n", it) return TextUtils.join("\n", it)
} }
return "" return null
} else { } else {
val textList = arrayListOf<String>() val textList = arrayListOf<String>()
for (rl in rules) { for (rl in rules) {
val temp = getString(rl) val temp = getString(rl)
if (temp.isNotEmpty()) { if (!temp.isNullOrEmpty()) {
textList.add(temp) textList.add(temp)
if (ruleAnalyzes.elementsType == "||") { if (ruleAnalyzes.elementsType == "||") {
break break

View File

@ -140,58 +140,58 @@ class AnalyzeRule(
mContent: Any? = null, mContent: Any? = null,
isUrl: Boolean = false isUrl: Boolean = false
): List<String>? { ): List<String>? {
var result: Any? = null
val content = mContent ?: this.content val content = mContent ?: this.content
if (content == null || ruleList.isEmpty()) { if (content != null && ruleList.isNotEmpty()) {
return null result = content
} if (result is NativeObject) {
var result: Any = content val sourceRule = ruleList.first()
if (result is NativeObject) {
val sourceRule = ruleList.first()
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule] ?: emptyList<String>()
}
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) {
result = result.map {
replaceRegex(it.toString(), sourceRule)
}
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap) putRule(sourceRule.putMap)
sourceRule.makeUpRule(result) sourceRule.makeUpRule(result)
if (sourceRule.rule.isNotEmpty()) { result = if (sourceRule.getParamSize() > 1) {
result = when (sourceRule.mode) { // get {{}}
Mode.Js -> evalJS(sourceRule.rule, result) ?: "null" sourceRule.rule
Mode.Json -> getAnalyzeByJSonPath(result).getStringList(sourceRule.rule) } else {
Mode.XPath -> getAnalyzeByXPath(result).getStringList(sourceRule.rule) // 键值直接访问
Mode.Default -> getAnalyzeByJSoup(result).getStringList(sourceRule.rule) result[sourceRule.rule]
else -> sourceRule.rule }?.let {
} replaceRegex(it.toString(), sourceRule)
} }
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) { } else {
result = result.map { for (sourceRule in ruleList) {
replaceRegex(it.toString(), sourceRule) putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result?.let {
if (sourceRule.rule.isNotEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, result)
Mode.Json -> getAnalyzeByJSonPath(it).getStringList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getStringList(sourceRule.rule)
Mode.Default -> getAnalyzeByJSoup(it).getStringList(sourceRule.rule)
else -> sourceRule.rule
}
}
if (sourceRule.replaceRegex.isNotEmpty() && result is List<*>) {
val newList = ArrayList<String>()
for (item in result as List<*>) {
newList.add(replaceRegex(item.toString(), sourceRule))
}
result = newList
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
} }
} else if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
} }
} }
} }
if (result == null) return null
if (result is String) { if (result is String) {
result = result.split("\n") result = (result as String).split("\n")
} }
if (isUrl) { if (isUrl) {
val urlList = ArrayList<String>() val urlList = ArrayList<String>()
if (result is List<*>) { if (result is List<*>) {
for (url in result) { for (url in result as List<*>) {
val absoluteURL = NetworkUtils.getAbsoluteURL(redirectUrl, url.toString()) val absoluteURL = NetworkUtils.getAbsoluteURL(redirectUrl, url.toString())
if (absoluteURL.isNotEmpty() && !urlList.contains(absoluteURL)) { if (absoluteURL.isNotEmpty() && !urlList.contains(absoluteURL)) {
urlList.add(absoluteURL) urlList.add(absoluteURL)
@ -227,47 +227,50 @@ class AnalyzeRule(
isUrl: Boolean = false, isUrl: Boolean = false,
unescape: Boolean = true unescape: Boolean = true
): String { ): String {
var result: Any? = null
val content = mContent ?: this.content val content = mContent ?: this.content
if (content == null || ruleList.isEmpty()) { if (content != null && ruleList.isNotEmpty()) {
return if (isUrl) baseUrl ?: "" else "" result = content
} if (result is NativeObject) {
var result: Any = content val sourceRule = ruleList.first()
if (result is NativeObject) {
val sourceRule = ruleList.first()
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result = (if (sourceRule.getParamSize() > 1) {
// get {{}}
sourceRule.rule
} else {
// 键值直接访问
result[sourceRule.rule]?.toString() ?: ""
}).let {
replaceRegex(it, sourceRule)
}
} else {
for (sourceRule in ruleList) {
putRule(sourceRule.putMap) putRule(sourceRule.putMap)
sourceRule.makeUpRule(result) sourceRule.makeUpRule(result)
if (sourceRule.rule.isNotBlank() || sourceRule.replaceRegex.isEmpty()) { result = if (sourceRule.getParamSize() > 1) {
result = when (sourceRule.mode) { // get {{}}
Mode.Js -> evalJS(sourceRule.rule, result) ?: "" sourceRule.rule
Mode.Json -> getAnalyzeByJSonPath(result).getString(sourceRule.rule) } else {
Mode.XPath -> getAnalyzeByXPath(result).getString(sourceRule.rule) // 键值直接访问
Mode.Default -> if (isUrl) { result[sourceRule.rule]?.toString()
getAnalyzeByJSoup(result).getString0(sourceRule.rule) }?.let {
} else { replaceRegex(it, sourceRule)
getAnalyzeByJSoup(result).getString(sourceRule.rule)
}
else -> sourceRule.rule
}
} }
if (sourceRule.replaceRegex.isNotEmpty()) { } else {
result = replaceRegex(result.toString(), sourceRule) for (sourceRule in ruleList) {
putRule(sourceRule.putMap)
sourceRule.makeUpRule(result)
result?.let {
if (sourceRule.rule.isNotBlank() || sourceRule.replaceRegex.isEmpty()) {
result = when (sourceRule.mode) {
Mode.Js -> evalJS(sourceRule.rule, it)
Mode.Json -> getAnalyzeByJSonPath(it).getString(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(it).getString(sourceRule.rule)
Mode.Default -> if (isUrl) {
getAnalyzeByJSoup(it).getString0(sourceRule.rule)
} else {
getAnalyzeByJSoup(it).getString(sourceRule.rule)
}
else -> sourceRule.rule
}
}
if ((result != null) && sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule)
}
}
} }
} }
} }
if (result == null) result = ""
val str = if (unescape) { val str = if (unescape) {
StringEscapeUtils.unescapeHtml4(result.toString()) StringEscapeUtils.unescapeHtml4(result.toString())
} else result.toString() } else result.toString()
@ -286,28 +289,30 @@ class AnalyzeRule(
*/ */
fun getElement(ruleStr: String): Any? { fun getElement(ruleStr: String): Any? {
if (TextUtils.isEmpty(ruleStr)) return null if (TextUtils.isEmpty(ruleStr)) return null
val content = this.content ?: return null var result: Any? = null
val content = this.content
val ruleList = splitSourceRule(ruleStr, true) val ruleList = splitSourceRule(ruleStr, true)
if (ruleList.isEmpty()) { if (content != null && ruleList.isNotEmpty()) {
return null result = content
} for (sourceRule in ruleList) {
var result = content putRule(sourceRule.putMap)
for (sourceRule in ruleList) { sourceRule.makeUpRule(result)
putRule(sourceRule.putMap) result?.let {
sourceRule.makeUpRule(result) result = when (sourceRule.mode) {
result = when (sourceRule.mode) { Mode.Regex -> AnalyzeByRegex.getElement(
Mode.Regex -> AnalyzeByRegex.getElement( result.toString(),
result.toString(), sourceRule.rule.splitNotBlank("&&")
sourceRule.rule.splitNotBlank("&&") )
)
Mode.Js -> evalJS(sourceRule.rule, result) ?: Any() Mode.Js -> evalJS(sourceRule.rule, it)
Mode.Json -> getAnalyzeByJSonPath(result).getObject(sourceRule.rule) Mode.Json -> getAnalyzeByJSonPath(it).getObject(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getElements(sourceRule.rule) Mode.XPath -> getAnalyzeByXPath(it).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(result).getElements(sourceRule.rule) else -> getAnalyzeByJSoup(it).getElements(sourceRule.rule)
} }
if (sourceRule.replaceRegex.isNotEmpty()) { if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule) result = replaceRegex(result.toString(), sourceRule)
}
}
} }
} }
return result return result
@ -318,30 +323,35 @@ class AnalyzeRule(
*/ */
@Suppress("UNCHECKED_CAST") @Suppress("UNCHECKED_CAST")
fun getElements(ruleStr: String): List<Any> { fun getElements(ruleStr: String): List<Any> {
val content = this.content ?: return emptyList() var result: Any? = null
val content = this.content
val ruleList = splitSourceRule(ruleStr, true) val ruleList = splitSourceRule(ruleStr, true)
if (ruleList.isEmpty()) { if (content != null && ruleList.isNotEmpty()) {
return emptyList() result = content
} for (sourceRule in ruleList) {
var result = content putRule(sourceRule.putMap)
for (sourceRule in ruleList) { result?.let {
putRule(sourceRule.putMap) result = when (sourceRule.mode) {
result = when (sourceRule.mode) { Mode.Regex -> AnalyzeByRegex.getElements(
Mode.Regex -> AnalyzeByRegex.getElements( result.toString(),
result.toString(), sourceRule.rule.splitNotBlank("&&")
sourceRule.rule.splitNotBlank("&&") )
)
Mode.Js -> evalJS(sourceRule.rule, result) ?: emptyList<Any>() Mode.Js -> evalJS(sourceRule.rule, result)
Mode.Json -> getAnalyzeByJSonPath(result).getList(sourceRule.rule) Mode.Json -> getAnalyzeByJSonPath(it).getList(sourceRule.rule)
Mode.XPath -> getAnalyzeByXPath(result).getElements(sourceRule.rule) Mode.XPath -> getAnalyzeByXPath(it).getElements(sourceRule.rule)
else -> getAnalyzeByJSoup(result).getElements(sourceRule.rule) else -> getAnalyzeByJSoup(it).getElements(sourceRule.rule)
} }
if (sourceRule.replaceRegex.isNotEmpty()) { if (sourceRule.replaceRegex.isNotEmpty()) {
result = replaceRegex(result.toString(), sourceRule) result = replaceRegex(result.toString(), sourceRule)
}
}
} }
} }
return result as List<Any> result?.let {
return it as List<Any>
}
return ArrayList()
} }
/** /**
@ -403,7 +413,7 @@ class AnalyzeRule(
/** /**
* getString 类规则缓存 * getString 类规则缓存
*/ */
fun splitSourceRuleCacheString(ruleStr: String?): List<SourceRule> { fun splitSourceRuleCacheString(ruleStr: String?) : List<SourceRule> {
if (ruleStr.isNullOrEmpty()) return emptyList() if (ruleStr.isNullOrEmpty()) return emptyList()
val cacheRule = stringRuleCache[ruleStr] val cacheRule = stringRuleCache[ruleStr]
return if (cacheRule != null) { return if (cacheRule != null) {