Skip to content

Commit

Permalink
优化
Browse files Browse the repository at this point in the history
  • Loading branch information
821938089 committed Dec 28, 2023
1 parent 830d6da commit 2f29c11
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 18 deletions.
23 changes: 20 additions & 3 deletions app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,32 @@ import androidx.annotation.Keep
import com.script.SimpleBindings
import com.script.rhino.RhinoScriptEngine
import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.*
import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BaseSource
import io.legado.app.data.entities.Book
import io.legado.app.data.entities.BookChapter
import io.legado.app.data.entities.BookSource
import io.legado.app.help.CacheManager
import io.legado.app.help.JsExtensions
import io.legado.app.help.http.CookieStore
import io.legado.app.model.webBook.WebBook
import io.legado.app.utils.*
import io.legado.app.utils.GSON
import io.legado.app.utils.NetworkUtils
import io.legado.app.utils.fromJsonObject
import io.legado.app.utils.isJson
import io.legado.app.utils.printOnDebug
import io.legado.app.utils.splitNotBlank
import io.legado.app.utils.stackTraceStr
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withTimeout
import org.apache.commons.text.StringEscapeUtils
import org.jsoup.nodes.Node
import org.mozilla.javascript.NativeObject
import java.net.URL
import java.util.regex.Pattern
import kotlin.collections.component1
import kotlin.collections.component2
import kotlin.collections.set

/**
* 解析规则获取结果
Expand Down Expand Up @@ -55,7 +69,10 @@ class AnalyzeRule(
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
if (content == null) throw AssertionError("内容不可空(Content cannot be null)")
this.content = content
isJSON = content.toString().isJson()
isJSON = when (content) {
is Node -> false
else -> content.toString().isJson()
}
setBaseUrl(baseUrl)
objectChangedXP = true
objectChangedJS = true
Expand Down
45 changes: 45 additions & 0 deletions app/src/main/java/io/legado/app/utils/ByteArrayExtensions.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package io.legado.app.utils


/**
* Search the data byte array for the first occurrence
* of the byte array pattern.
*/
fun ByteArray.indexOf(pattern: ByteArray, start: Int = 0, stop: Int = size): Int {
val data = this
val failure: IntArray = computeFailure(pattern)

var j = 0

for (i in start until stop) {
while (j > 0 && pattern[j] != data[i]) {
j = failure[j - 1]
}
if (pattern[j] == data[i]) {
j++
}
if (j == pattern.size) {
return i - pattern.size + 1
}
}
return -1
}

/**
* Computes the failure function using a boot-strapping process,
* where the pattern is matched against itself.
*/
private fun computeFailure(pattern: ByteArray): IntArray {
val failure = IntArray(pattern.size)
var j = 0
for (i in 1 until pattern.size) {
while (j > 0 && pattern[j] != pattern[i]) {
j = failure[j - 1]
}
if (pattern[j] == pattern[i]) {
j++
}
failure[i] = j
}
return failure
}
32 changes: 20 additions & 12 deletions app/src/main/java/io/legado/app/utils/EncodingDetect.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,43 @@ import io.legado.app.lib.icu4j.CharsetDetector
import org.jsoup.Jsoup
import java.io.File
import java.io.FileInputStream
import java.nio.charset.StandardCharsets
import java.util.*

/**
* 自动获取文件的编码
* */
@Suppress("MemberVisibilityCanBePrivate", "unused")
object EncodingDetect {

fun getHtmlEncode(bytes: ByteArray): String? {
private val headTagRegex = "(?i)<head>[\\s\\S]*?</head>".toRegex()
private val headOpenBytes = "<head>".toByteArray()
private val headCloseBytes = "</head>".toByteArray()

fun getHtmlEncode(bytes: ByteArray): String {
try {
val doc = Jsoup.parse(String(bytes, StandardCharsets.UTF_8))
var head: String? = null
val startIndex = bytes.indexOf(headOpenBytes)
if (startIndex > -1) {
val endIndex = bytes.indexOf(headCloseBytes, startIndex)
if (endIndex > -1) {
head = String(bytes.copyOfRange(startIndex, endIndex + headCloseBytes.size))
}
}
val doc = Jsoup.parseBodyFragment(head ?: headTagRegex.find(String(bytes))!!.value)
val metaTags = doc.getElementsByTag("meta")
var charsetStr: String
for (metaTag in metaTags) {
charsetStr = metaTag.attr("charset")
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr
}
val content = metaTag.attr("content")
val httpEquiv = metaTag.attr("http-equiv")
if (httpEquiv.lowercase(Locale.getDefault()) == "content-type") {
charsetStr = if (content.lowercase(Locale.getDefault()).contains("charset")) {
content.substring(
content.lowercase(Locale.getDefault())
.indexOf("charset") + "charset=".length
)
if (httpEquiv.equals("content-type", true)) {
val content = metaTag.attr("content")
val idx = content.indexOf("charset=", ignoreCase = true)
charsetStr = if (idx > -1) {
content.substring(idx + "charset=".length)
} else {
content.substring(content.lowercase(Locale.getDefault()).indexOf(";") + 1)
content.substringAfter(";")
}
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr
Expand Down
9 changes: 6 additions & 3 deletions app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ object HtmlFormatter {
"<img[^>]*\\ssrc\\s*=\\s*\"([^\"{>]*\\{(?:[^{}]|\\{[^}>]+\\})+\\})\"[^>]*>|<img[^>]*\\sdata-[^=>]*=\\s*\"([^\">]*)\"[^>]*>|<img[^>]*\\ssrc\\s*=\\s*\"([^\">]*)\"[^>]*>",
Pattern.CASE_INSENSITIVE
)
private val indent1Regex = "\\s*\\n+\\s*".toRegex()
private val indent2Regex = "^[\\n\\s]+".toRegex()
private val lastRegex = "[\\n\\s]+$".toRegex()

fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
html ?: return ""
Expand All @@ -26,9 +29,9 @@ object HtmlFormatter {
.replace(wrapHtmlRegex, "\n")
.replace(commentRegex, "")
.replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "")
.replace(indent1Regex, "\n  ")
.replace(indent2Regex, "  ")
.replace(lastRegex, "")
}

fun formatKeepImg(html: String?, redirectUrl: URL? = null): String {
Expand Down

0 comments on commit 2f29c11

Please sign in to comment.