-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: 正确解析小说中的嵌套标签
- Loading branch information
Showing
10 changed files
with
297 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
package top.kagg886.pixko.anno | ||
|
||
@RequiresOptIn("此类api未经过大规模测试,在未来可能会被修改", RequiresOptIn.Level.ERROR) | ||
annotation class ExperimentalNovelParserAPI |
23 changes: 0 additions & 23 deletions
23
src/main/kotlin/top/kagg886/pixko/module/novel/parser/data.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/data.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package top.kagg886.pixko.module.novel.parser.v2 | ||
|
||
sealed interface NovelNode { | ||
val position: IntRange | ||
} | ||
|
||
|
||
data class TextNode(val text: CombinedText, override val position: IntRange) : NovelNode | ||
|
||
data class JumpUriNode(val text: String, val uri: String, override val position: IntRange) : NovelNode | ||
|
||
data class UploadImageNode(val url: String, override val position: IntRange) : NovelNode | ||
|
||
data class PixivImageNode(val id: Int, val index: Int = 0, override val position: IntRange) : NovelNode | ||
|
||
data class NewPageNode(override val position: IntRange) : NovelNode | ||
|
||
data class TitleNode(val text: CombinedText, override val position: IntRange) : NovelNode | ||
|
||
data class JumpPageNode(val page: Int, override val position: IntRange) : NovelNode | ||
|
||
|
||
val NovelNode.isBlocking get() = this is JumpUriNode || this is TextNode || this is JumpPageNode | ||
|
||
|
||
class CombinedText internal constructor(nodes: List<CombinedTextNode>) : List<CombinedTextNode> by nodes { | ||
override fun toString() = joinToString { | ||
when (it) { | ||
is NotatedText -> "${it.text}^{${it.notation}}" | ||
is PlainText -> it.text | ||
} | ||
} | ||
} | ||
|
||
fun List<CombinedTextNode>.asCombinedText() = CombinedText(this) | ||
|
||
sealed interface CombinedTextNode { | ||
val text: String | ||
|
||
fun asSingle() = CombinedText(listOf(this)) | ||
} | ||
|
||
data class PlainText(override val text: String) : CombinedTextNode | ||
data class NotatedText(override val text: String, val notation: String) : CombinedTextNode | ||
|
||
fun String.toPlainText() = PlainText(this) | ||
fun String.toNotatedText(notation: String) = NotatedText(this, notation) |
124 changes: 124 additions & 0 deletions
124
src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/parse.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
package top.kagg886.pixko.module.novel.parser.v2 | ||
|
||
import top.kagg886.pixko.anno.ExperimentalNovelParserAPI | ||
|
||
// 或在将来被使用 | ||
// private val JUMP_URI_REGEX = "\\[\\[jumpuri:(.*)>(.*)]]".toRegex() | ||
private val NOTATION_REGEX = "\\[\\[rb:(.*)>(.*)]]".toRegex() | ||
|
||
private val TAG_REGEX = """\[{1,2}(\w+)(?::([^\[\]]+))?]{1,2}""".toRegex() | ||
|
||
private fun tagToNode( | ||
name: String, | ||
rawValue: String, | ||
position: IntRange, | ||
): NovelNode = | ||
when (name) { | ||
"jump" -> JumpPageNode(rawValue.toInt(), position) | ||
"newpage" -> NewPageNode(position) | ||
"uploadedimage" -> UploadImageNode(rawValue, position) | ||
"pixivimage" -> { | ||
val split = rawValue.split("-").map { it.toInt() } | ||
val id = split.first() | ||
val pageIndex = split.getOrElse(1) { 1 } - 1 | ||
PixivImageNode(id, pageIndex, position) | ||
} | ||
|
||
"jumpuri" -> { | ||
val (text, url) = rawValue.split(">", limit = 2).also { | ||
check(it.size == 2) { "Cannot find separator(>)" } | ||
} | ||
check(url.startsWith("http")) { | ||
"Invalid url:$url" | ||
} | ||
JumpUriNode(text, url, position) | ||
} | ||
|
||
"rb" -> { | ||
val (notation, text) = rawValue.split(">", limit = 2).also { | ||
check(it.size == 2) { "Cannot find separator(>)" } | ||
} | ||
TextNode(text.toNotatedText(notation).asSingle(), position) | ||
} | ||
|
||
"chapter" -> { | ||
val textNodes = mutableListOf<CombinedTextNode>() | ||
var lastIndex = 0 | ||
NOTATION_REGEX.findAll(rawValue).forEach { result -> | ||
val position = result.range | ||
if (position.first > lastIndex) { | ||
val plain = rawValue.substring(lastIndex, position.first).toPlainText() | ||
textNodes.add(plain) | ||
} | ||
val (notation, text) = result.destructured | ||
val notated = text.toNotatedText(notation) | ||
textNodes.add(notated) | ||
lastIndex = position.last + 1 | ||
} | ||
if (lastIndex < rawValue.length) { | ||
val plain = rawValue.substring(lastIndex).toPlainText() | ||
textNodes.add(plain) | ||
} | ||
TitleNode(textNodes.asCombinedText(), position) | ||
} | ||
|
||
else -> error("Unknown tag name:$name") | ||
} | ||
|
||
@ExperimentalNovelParserAPI | ||
fun createNovelDataV2(str: String): List<NovelNode> { | ||
val nodes = mutableListOf<NovelNode>() | ||
var lastIndex = 0 | ||
|
||
TAG_REGEX.findAll(str).forEach { result -> | ||
val position = result.range | ||
|
||
if (position.first > lastIndex) { | ||
val plainText = str.substring(lastIndex, position.first).toPlainText().asSingle() | ||
nodes.add(TextNode(plainText, lastIndex..position.first)) | ||
} | ||
|
||
val (name, rawValue) = result.destructured | ||
try { | ||
val node = tagToNode(name, rawValue, position) | ||
nodes.add(node) | ||
} catch (_: Exception) { // 处理未知tag | ||
nodes.add(TextNode(result.value.toPlainText().asSingle(), position)) | ||
} | ||
|
||
lastIndex = position.last + 1 | ||
} | ||
|
||
if (lastIndex < str.length) { | ||
val plainText = str.substring(lastIndex).toPlainText().asSingle() | ||
nodes.add(TextNode(plainText, lastIndex..(str.length))) | ||
} | ||
|
||
return nodes | ||
} | ||
|
||
@ExperimentalNovelParserAPI | ||
fun List<NovelNode>.toOriginalString(): String { | ||
fun CombinedText.toOriginalString() = joinToString("") { | ||
when (it) { | ||
is NotatedText -> "[[rb:${it.notation}>${it.text}]]" | ||
is PlainText -> it.text | ||
} | ||
} | ||
|
||
return buildString { | ||
this@toOriginalString.forEach { v -> | ||
append( | ||
when (v) { | ||
is TextNode -> v.text.toOriginalString() | ||
is JumpUriNode -> "[[jumpuri:${v.text}>${v.uri}]]" | ||
is UploadImageNode -> "[uploadedimage:${v.url}]" | ||
is PixivImageNode -> "[pixivimage:${v.id}${if (v.index != 0) "-${v.index + 1}" else ""}]" | ||
is NewPageNode -> "[newpage]" | ||
is TitleNode -> "[chapter:${v.text.toOriginalString()}]" | ||
is JumpPageNode -> "[jump:${v.page}]" | ||
} | ||
) | ||
} | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/utils.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package top.kagg886.pixko.module.novel.parser.v2 | ||
|
||
import top.kagg886.pixko.anno.ExperimentalNovelParserAPI | ||
import top.kagg886.pixko.module.novel.NovelData | ||
|
||
@ExperimentalNovelParserAPI | ||
val NovelData.content | ||
get() = lazy { | ||
createNovelDataV2(this.text) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.