diff --git a/app/src/main/java/io/legado/app/model/localBook/EpubFile.kt b/app/src/main/java/io/legado/app/model/localBook/EpubFile.kt
index b617bcab2..6a4049345 100644
--- a/app/src/main/java/io/legado/app/model/localBook/EpubFile.kt
+++ b/app/src/main/java/io/legado/app/model/localBook/EpubFile.kt
@@ -142,7 +142,7 @@ class EpubFile(var book: Book) {
return ""
}
/*获取当前章节文本*/
- epubBook?.let { epubBook ->
+ epubBook?.let {
val nextChapterFirstResourceHref = chapter.getVariable("nextUrl")?.substringBeforeLast("#")
val currentChapterFirstResourceHref = chapter.url.substringBeforeLast("#")
val isLastChapter = nextChapterFirstResourceHref.isNullOrBlank()
@@ -201,40 +201,55 @@ class EpubFile(var book: Book) {
}
private fun getBody(res: Resource, startFragmentId: String?, endFragmentId: String?): Element {
- val originHtml = String(res.data, mCharset)
- var html = originHtml
- var doc = Jsoup.parse(html)
- var body = doc.body()
+ // Jsoup可能会修复不规范的xhtml文件 解析处理后再获取
+ var bodyElement = Jsoup.parse(String(res.data, mCharset)).body()
+ bodyElement.children().run {
+ select("script").remove()
+ select("style").remove()
+ }
+ // 获取body对应的文本
+ var bodyString = bodyElement.outerHtml()
+ val originBodyString = bodyString
+ /**
+ * 某些xhtml文件 章节标题和内容不在一个节点或者不是兄弟节点
+ *
....
+ * 先找到FragmentId对应的Element 然后直接截取之间的html + */ if (!startFragmentId.isNullOrBlank()) { - body.getElementById(startFragmentId)?.outerHtml()?.let { - html = html.substringAfter(it) + bodyElement.getElementById(startFragmentId)?.outerHtml()?.let { + bodyString = bodyString.substringAfter(it) } } if (!endFragmentId.isNullOrBlank() && endFragmentId != startFragmentId) { - body.getElementById(endFragmentId)?.outerHtml()?.let { - html = html.substringBefore(it) + bodyElement.getElementById(endFragmentId)?.outerHtml()?.let { + bodyString = bodyString.substringBefore(it) } } - if (html != originHtml) { - doc = Jsoup.parse(html) - body = doc.body() + //截取过再重新解析 + if (bodyString != originBodyString) { + bodyElement = Jsoup.parse(bodyString).body() } /*选择去除正文中的H标签,部分书籍标题与阅读标题重复待优化*/ val tag = Book.hTag if (book.getDelTag(tag)) { - body.getElementsByTag("h1").remove() - body.getElementsByTag("h2").remove() - body.getElementsByTag("h3").remove() - body.getElementsByTag("h4").remove() - body.getElementsByTag("h5").remove() - body.getElementsByTag("h6").remove() - //body.getElementsMatchingOwnText(chapter.title)?.remove() + bodyElement.run { + getElementsByTag("h1").remove() + getElementsByTag("h2").remove() + getElementsByTag("h3").remove() + getElementsByTag("h4").remove() + getElementsByTag("h5").remove() + getElementsByTag("h6").remove() + //getElementsMatchingOwnText(chapter.title)?.remove() + } } - - val children = body.children() - children.select("script").remove() - children.select("style").remove() - return doc.body() + return bodyElement } private fun getImage(href: String): InputStream? {