This commit is contained in:
Xwite 2023-03-11 19:00:58 +08:00
parent 88a99b016d
commit a8fe516332

View File

@ -142,7 +142,7 @@ class EpubFile(var book: Book) {
return "<img src=\"cover.jpeg\" />" return "<img src=\"cover.jpeg\" />"
} }
/*获取当前章节文本*/ /*获取当前章节文本*/
epubBook?.let { epubBook -> epubBook?.let {
val nextChapterFirstResourceHref = chapter.getVariable("nextUrl")?.substringBeforeLast("#") val nextChapterFirstResourceHref = chapter.getVariable("nextUrl")?.substringBeforeLast("#")
val currentChapterFirstResourceHref = chapter.url.substringBeforeLast("#") val currentChapterFirstResourceHref = chapter.url.substringBeforeLast("#")
val isLastChapter = nextChapterFirstResourceHref.isNullOrBlank() val isLastChapter = nextChapterFirstResourceHref.isNullOrBlank()
@ -201,40 +201,55 @@ class EpubFile(var book: Book) {
} }
private fun getBody(res: Resource, startFragmentId: String?, endFragmentId: String?): Element { private fun getBody(res: Resource, startFragmentId: String?, endFragmentId: String?): Element {
val originHtml = String(res.data, mCharset) // Jsoup可能会修复不规范的xhtml文件 解析处理后再获取
var html = originHtml var bodyElement = Jsoup.parse(String(res.data, mCharset)).body()
var doc = Jsoup.parse(html) bodyElement.children().run {
var body = doc.body() select("script").remove()
select("style").remove()
}
// 获取body对应的文本
var bodyString = bodyElement.outerHtml()
val originBodyString = bodyString
/**
* 某些xhtml文件 章节标题和内容不在一个节点或者不是兄弟节点
* <div>
* <a class="mulu1>目录1</a>
* </div>
* <p>....</p>
* <div>
* <a class="mulu2>目录2</a>
* </div>
* <p>....</p>
* 先找到FragmentId对应的Element 然后直接截取之间的html
*/
if (!startFragmentId.isNullOrBlank()) { if (!startFragmentId.isNullOrBlank()) {
body.getElementById(startFragmentId)?.outerHtml()?.let { bodyElement.getElementById(startFragmentId)?.outerHtml()?.let {
html = html.substringAfter(it) bodyString = bodyString.substringAfter(it)
} }
} }
if (!endFragmentId.isNullOrBlank() && endFragmentId != startFragmentId) { if (!endFragmentId.isNullOrBlank() && endFragmentId != startFragmentId) {
body.getElementById(endFragmentId)?.outerHtml()?.let { bodyElement.getElementById(endFragmentId)?.outerHtml()?.let {
html = html.substringBefore(it) bodyString = bodyString.substringBefore(it)
} }
} }
if (html != originHtml) { //截取过再重新解析
doc = Jsoup.parse(html) if (bodyString != originBodyString) {
body = doc.body() bodyElement = Jsoup.parse(bodyString).body()
} }
/*选择去除正文中的H标签部分书籍标题与阅读标题重复待优化*/ /*选择去除正文中的H标签部分书籍标题与阅读标题重复待优化*/
val tag = Book.hTag val tag = Book.hTag
if (book.getDelTag(tag)) { if (book.getDelTag(tag)) {
body.getElementsByTag("h1").remove() bodyElement.run {
body.getElementsByTag("h2").remove() getElementsByTag("h1").remove()
body.getElementsByTag("h3").remove() getElementsByTag("h2").remove()
body.getElementsByTag("h4").remove() getElementsByTag("h3").remove()
body.getElementsByTag("h5").remove() getElementsByTag("h4").remove()
body.getElementsByTag("h6").remove() getElementsByTag("h5").remove()
//body.getElementsMatchingOwnText(chapter.title)?.remove() getElementsByTag("h6").remove()
//getElementsMatchingOwnText(chapter.title)?.remove()
}
} }
return bodyElement
val children = body.children()
children.select("script").remove()
children.select("style").remove()
return doc.body()
} }
private fun getImage(href: String): InputStream? { private fun getImage(href: String): InputStream? {