This commit is contained in:
gedoor 2022-01-06 12:35:58 +08:00
parent fd3c9aa3f8
commit 558dfc5880

View File

@ -32,7 +32,6 @@ class TextFile(private val book: Book) {
rulePattern = if (book.tocUrl.isNotEmpty()) { rulePattern = if (book.tocUrl.isNotEmpty()) {
Pattern.compile(book.tocUrl, Pattern.MULTILINE) Pattern.compile(book.tocUrl, Pattern.MULTILINE)
} else { } else {
tocRules.addAll(getTocRules())
if (blockContent.isEmpty()) { if (blockContent.isEmpty()) {
bis.read(buffer) bis.read(buffer)
book.charset = EncodingDetect.getEncode(buffer) book.charset = EncodingDetect.getEncode(buffer)
@ -50,18 +49,8 @@ class TextFile(private val book: Book) {
private fun analyze(pattern: Pattern?): ArrayList<BookChapter> { private fun analyze(pattern: Pattern?): ArrayList<BookChapter> {
val toc = arrayListOf<BookChapter>() val toc = arrayListOf<BookChapter>()
LocalBook.getBookInputStream(book).use { bis -> LocalBook.getBookInputStream(book).use { bis ->
var tocRule: TxtTocRule? = null
val buffer = ByteArray(BUFFER_SIZE) val buffer = ByteArray(BUFFER_SIZE)
var blockContent: String var blockContent: String
val rulePattern = pattern ?: let {
val length = bis.read(buffer)
bis.skip(-length.toLong())
blockContent = String(buffer, charset)
tocRule = getTocRule(blockContent)
tocRule?.let {
Pattern.compile(it.rule, Pattern.MULTILINE)
}
}
//加载章节 //加载章节
var curOffset: Long = 0 var curOffset: Long = 0
//block的个数 //block的个数
@ -72,7 +61,7 @@ class TextFile(private val book: Book) {
while (bis.read(buffer).also { length = it } > 0) { while (bis.read(buffer).also { length = it } > 0) {
blockPos++ blockPos++
//如果存在Chapter //如果存在Chapter
if (rulePattern != null) { if (pattern != null) {
//将数据转换成String, 不能超过length //将数据转换成String, 不能超过length
blockContent = String(buffer, 0, length, charset) blockContent = String(buffer, 0, length, charset)
val lastN = blockContent.lastIndexOf("\n") val lastN = blockContent.lastIndexOf("\n")
@ -85,7 +74,7 @@ class TextFile(private val book: Book) {
//当前Block下使过的String的指针 //当前Block下使过的String的指针
var seekPos = 0 var seekPos = 0
//进行正则匹配 //进行正则匹配
val matcher: Matcher = rulePattern.matcher(blockContent) val matcher: Matcher = pattern.matcher(blockContent)
//如果存在相应章节 //如果存在相应章节
while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置
val chapterStart = matcher.start() val chapterStart = matcher.start()
@ -93,11 +82,11 @@ class TextFile(private val book: Book) {
val chapterContent = blockContent.substring(seekPos, chapterStart) val chapterContent = blockContent.substring(seekPos, chapterStart)
val chapterLength = chapterContent.toByteArray(charset).size val chapterLength = chapterContent.toByteArray(charset).size
val lastStart = toc.lastOrNull()?.start ?: 0 val lastStart = toc.lastOrNull()?.start ?: 0
if (curOffset + chapterLength - lastStart > 50000 && pattern == null) { if (curOffset + chapterLength - lastStart > 50000) {
//移除不匹配的规则
tocRules.remove(tocRule)
bis.close() bis.close()
return analyze(null) //移除不匹配的规则
tocRules.removeFirstOrNull()
return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE))
} }
//如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容
//第一种情况一定是序章 第二种情况是上一个章节的内容 //第一种情况一定是序章 第二种情况是上一个章节的内容
@ -150,11 +139,11 @@ class TextFile(private val book: Book) {
//设置指针偏移 //设置指针偏移
seekPos += chapterContent.length seekPos += chapterContent.length
} }
if (seekPos == 0 && length > 50000 && pattern == null) { if (seekPos == 0 && length > 50000) {
//移除不匹配的规则
tocRules.remove(tocRule)
bis.close() bis.close()
return analyze(null) //移除不匹配的规则
tocRules.remove(tocRules.removeFirstOrNull())
return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE))
} }
} else { //进行本地虚拟分章 } else { //进行本地虚拟分章
//章节在buffer的偏移量 //章节在buffer的偏移量
@ -198,7 +187,7 @@ class TextFile(private val book: Book) {
//block的偏移点 //block的偏移点
curOffset += length.toLong() curOffset += length.toLong()
if (rulePattern != null) { if (pattern != null) {
//设置上一章的结尾 //设置上一章的结尾
val lastChapter = toc.last() val lastChapter = toc.last()
lastChapter.end = curOffset lastChapter.end = curOffset
@ -210,9 +199,7 @@ class TextFile(private val book: Book) {
System.runFinalization() System.runFinalization()
} }
} }
tocRule?.let { book.tocUrl = pattern?.pattern() ?: ""
book.tocUrl = it.rule
}
} }
for (i in toc.indices) { for (i in toc.indices) {
val bean = toc[i] val bean = toc[i]
@ -233,24 +220,25 @@ class TextFile(private val book: Book) {
* 获取匹配次数最多的目录规则 * 获取匹配次数最多的目录规则
*/ */
private fun getTocRule(content: String): TxtTocRule? { private fun getTocRule(content: String): TxtTocRule? {
tocRules.clear()
val rules = getTocRules().reversed()
var txtTocRule: TxtTocRule? = null var txtTocRule: TxtTocRule? = null
var maxCs = 0 var maxCs = 0
val removeRules = hashSetOf<TxtTocRule>() for (tocRule in rules) {
tocRules.forEach { tocRule ->
val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE) val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE)
val matcher = pattern.matcher(content) val matcher = pattern.matcher(content)
var cs = 0 var cs = 0
while (matcher.find()) { while (matcher.find()) {
cs++ cs++
} }
if (cs == 0) { if (cs >= maxCs) {
removeRules.add(tocRule) tocRules.add(0, tocRule)
} else if (cs > maxCs) {
maxCs = cs maxCs = cs
txtTocRule = tocRule txtTocRule = tocRule
} else if (cs > 0) {
tocRules.add(tocRule)
} }
} }
tocRules.removeAll(removeRules)
return txtTocRule return txtTocRule
} }