Skip to content

Commit

Permalink
合并拉取请求 #3
Browse files Browse the repository at this point in the history
feat: 正确解析小说中的嵌套标签
  • Loading branch information
kagg886 authored Jan 24, 2025
2 parents 9983597 + 2a8cb01 commit dcf7a3b
Show file tree
Hide file tree
Showing 10 changed files with 297 additions and 67 deletions.
4 changes: 4 additions & 0 deletions src/main/kotlin/top/kagg886/pixko/anno/experimental.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package top.kagg886.pixko.anno

@RequiresOptIn("此类api未经过大规模测试,在未来可能会被修改", RequiresOptIn.Level.ERROR)
annotation class ExperimentalNovelParserAPI
23 changes: 0 additions & 23 deletions src/main/kotlin/top/kagg886/pixko/module/novel/parser/data.kt
Original file line number Diff line number Diff line change
@@ -1,28 +1,5 @@
package top.kagg886.pixko.module.novel.parser

//@Deprecated("")
//enum class NovelContentBlockType(val blocking: Boolean, val double: Boolean = true) {
// PLAIN(false, false),
// JUMP_URI(false, false), //[[jumpuri:详情>链接]]
// NOTATION(blocking = false, false),//[[rb:文本>注音]]
//
// UPLOAD_IMAGE(blocking = true), //[[uploadedimage:id]]
// PIXIV_IMAGE(blocking = true),//[[pixivimage:id]]
//
// NEW_PAGE(blocking = true), //[newpage]
// TITLE(blocking = true), //[chapter:标题]
// JUMP_PAGE(blocking = true),//[jump:页码]
//}
//
//@Deprecated("")
//data class TextNode(
// val novelContentBlockType: NovelContentBlockType,
// val value: String? = null,
// val metadata: String? = null,
// val position: IntRange
//)


sealed interface NovelNode {
val blocking: Boolean
val position: IntRange
Expand Down
47 changes: 47 additions & 0 deletions src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/data.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package top.kagg886.pixko.module.novel.parser.v2

sealed interface NovelNode {
val position: IntRange
}


data class TextNode(val text: CombinedText, override val position: IntRange) : NovelNode

data class JumpUriNode(val text: String, val uri: String, override val position: IntRange) : NovelNode

data class UploadImageNode(val url: String, override val position: IntRange) : NovelNode

data class PixivImageNode(val id: Int, val index: Int = 0, override val position: IntRange) : NovelNode

data class NewPageNode(override val position: IntRange) : NovelNode

data class TitleNode(val text: CombinedText, override val position: IntRange) : NovelNode

data class JumpPageNode(val page: Int, override val position: IntRange) : NovelNode


val NovelNode.isBlocking get() = this is JumpUriNode || this is TextNode || this is JumpPageNode


class CombinedText internal constructor(nodes: List<CombinedTextNode>) : List<CombinedTextNode> by nodes {
override fun toString() = joinToString {
when (it) {
is NotatedText -> "${it.text}^{${it.notation}}"
is PlainText -> it.text
}
}
}

fun List<CombinedTextNode>.asCombinedText() = CombinedText(this)

sealed interface CombinedTextNode {
val text: String

fun asSingle() = CombinedText(listOf(this))
}

data class PlainText(override val text: String) : CombinedTextNode
data class NotatedText(override val text: String, val notation: String) : CombinedTextNode

fun String.toPlainText() = PlainText(this)
fun String.toNotatedText(notation: String) = NotatedText(this, notation)
124 changes: 124 additions & 0 deletions src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/parse.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package top.kagg886.pixko.module.novel.parser.v2

import top.kagg886.pixko.anno.ExperimentalNovelParserAPI

// 或在将来被使用
// private val JUMP_URI_REGEX = "\\[\\[jumpuri:(.*)>(.*)]]".toRegex()
private val NOTATION_REGEX = "\\[\\[rb:(.*)>(.*)]]".toRegex()

private val TAG_REGEX = """\[{1,2}(\w+)(?::([^\[\]]+))?]{1,2}""".toRegex()

private fun tagToNode(
name: String,
rawValue: String,
position: IntRange,
): NovelNode =
when (name) {
"jump" -> JumpPageNode(rawValue.toInt(), position)
"newpage" -> NewPageNode(position)
"uploadedimage" -> UploadImageNode(rawValue, position)
"pixivimage" -> {
val split = rawValue.split("-").map { it.toInt() }
val id = split.first()
val pageIndex = split.getOrElse(1) { 1 } - 1
PixivImageNode(id, pageIndex, position)
}

"jumpuri" -> {
val (text, url) = rawValue.split(">", limit = 2).also {
check(it.size == 2) { "Cannot find separator(>)" }
}
check(url.startsWith("http")) {
"Invalid url:$url"
}
JumpUriNode(text, url, position)
}

"rb" -> {
val (notation, text) = rawValue.split(">", limit = 2).also {
check(it.size == 2) { "Cannot find separator(>)" }
}
TextNode(text.toNotatedText(notation).asSingle(), position)
}

"chapter" -> {
val textNodes = mutableListOf<CombinedTextNode>()
var lastIndex = 0
NOTATION_REGEX.findAll(rawValue).forEach { result ->
val position = result.range
if (position.first > lastIndex) {
val plain = rawValue.substring(lastIndex, position.first).toPlainText()
textNodes.add(plain)
}
val (notation, text) = result.destructured
val notated = text.toNotatedText(notation)
textNodes.add(notated)
lastIndex = position.last + 1
}
if (lastIndex < rawValue.length) {
val plain = rawValue.substring(lastIndex).toPlainText()
textNodes.add(plain)
}
TitleNode(textNodes.asCombinedText(), position)
}

else -> error("Unknown tag name:$name")
}

@ExperimentalNovelParserAPI
fun createNovelDataV2(str: String): List<NovelNode> {
val nodes = mutableListOf<NovelNode>()
var lastIndex = 0

TAG_REGEX.findAll(str).forEach { result ->
val position = result.range

if (position.first > lastIndex) {
val plainText = str.substring(lastIndex, position.first).toPlainText().asSingle()
nodes.add(TextNode(plainText, lastIndex..position.first))
}

val (name, rawValue) = result.destructured
try {
val node = tagToNode(name, rawValue, position)
nodes.add(node)
} catch (_: Exception) { // 处理未知tag
nodes.add(TextNode(result.value.toPlainText().asSingle(), position))
}

lastIndex = position.last + 1
}

if (lastIndex < str.length) {
val plainText = str.substring(lastIndex).toPlainText().asSingle()
nodes.add(TextNode(plainText, lastIndex..(str.length)))
}

return nodes
}

@ExperimentalNovelParserAPI
fun List<NovelNode>.toOriginalString(): String {
fun CombinedText.toOriginalString() = joinToString("") {
when (it) {
is NotatedText -> "[[rb:${it.notation}>${it.text}]]"
is PlainText -> it.text
}
}

return buildString {
this@toOriginalString.forEach { v ->
append(
when (v) {
is TextNode -> v.text.toOriginalString()
is JumpUriNode -> "[[jumpuri:${v.text}>${v.uri}]]"
is UploadImageNode -> "[uploadedimage:${v.url}]"
is PixivImageNode -> "[pixivimage:${v.id}${if (v.index != 0) "-${v.index + 1}" else ""}]"
is NewPageNode -> "[newpage]"
is TitleNode -> "[chapter:${v.text.toOriginalString()}]"
is JumpPageNode -> "[jump:${v.page}]"
}
)
}
}
}
10 changes: 10 additions & 0 deletions src/main/kotlin/top/kagg886/pixko/module/novel/parser/v2/utils.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package top.kagg886.pixko.module.novel.parser.v2

import top.kagg886.pixko.anno.ExperimentalNovelParserAPI
import top.kagg886.pixko.module.novel.NovelData

@ExperimentalNovelParserAPI
val NovelData.content
get() = lazy {
createNovelDataV2(this.text)
}
16 changes: 12 additions & 4 deletions src/main/kotlin/top/kagg886/pixko/module/search/illust.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,17 @@ enum class SearchSort {
* # 搜索目标
* @property EXACT_MATCH_FOR_TAGS 精确匹配标签
* @property PARTIAL_MATCH_FOR_TAGS 模糊匹配标签
* @property TITLE_AND_CAPTION 标题和描述
* @property TITLE_AND_CAPTION 标题和描述 (仅限插画)
* @property TEXT 描述(仅限小说)
* @property KEYWORD 关键词(仅限小说)
*
*/
enum class SearchTarget {
EXACT_MATCH_FOR_TAGS,
PARTIAL_MATCH_FOR_TAGS,
TITLE_AND_CAPTION
TITLE_AND_CAPTION,
TEXT,
KEYWORD
}

/**
Expand All @@ -43,8 +48,8 @@ enum class SearchTarget {
* @property page 页码
*/
data class SearchConfig(
var sort: SearchSort = SearchSort.DATE_DESC,
var searchTarget: SearchTarget = SearchTarget.PARTIAL_MATCH_FOR_TAGS,
var sort: SearchSort = DATE_DESC,
var searchTarget: SearchTarget = PARTIAL_MATCH_FOR_TAGS,
var startDate: LocalDateTime? = null,
var endDate: LocalDateTime? = null,
var page: Int = 1
Expand All @@ -61,6 +66,9 @@ suspend fun PixivAccount.searchIllust(
block: SearchConfig.() -> Unit = {}
): List<Illust> {
val (sort, searchTarget, startDate, endDate, page) = SearchConfig().apply(block)
check(searchTarget in listOf(EXACT_MATCH_FOR_TAGS, PARTIAL_MATCH_FOR_TAGS, TITLE_AND_CAPTION)) {
"searchTarget must be EXACT_MATCH_FOR_TAGS, PARTIAL_MATCH_FOR_TAGS or TITLE_AND_CAPTION"
}
val userInfo = getCurrentUserSimpleProfile()
if (!userInfo.isPremium && sort == POPULAR_DESC) {
if (page != 1) {
Expand Down
11 changes: 10 additions & 1 deletion src/main/kotlin/top/kagg886/pixko/module/search/novel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@ suspend fun PixivAccount.searchNovel(
block: SearchConfig.() -> Unit = {}
): List<Novel> {
val (sort, searchTarget, startDate, endDate, page) = SearchConfig().apply(block)

check(
searchTarget in listOf(
SearchTarget.EXACT_MATCH_FOR_TAGS,
SearchTarget.PARTIAL_MATCH_FOR_TAGS,
SearchTarget.TEXT,
SearchTarget.KEYWORD
)
) {
"searchTarget must be EXACT_MATCH_FOR_TAGS,PARTIAL_MATCH_FOR_TAGS,TEXT,KEYWORD"
}
val userInfo = getCurrentUserSimpleProfile()
if (!userInfo.isPremium && sort == POPULAR_DESC) {
if (page != 1) {
Expand Down
12 changes: 6 additions & 6 deletions src/test/kotlin/NovelTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class NovelTest {
}

@Test
fun testDetail():Unit = runBlocking {
val novel = client.getNovelDetail( 22767441)
fun testDetail(): Unit = runBlocking {
val novel = client.getNovelDetail(22767441)
}

@Test
Expand All @@ -33,20 +33,20 @@ class NovelTest {
}

@Test
fun testNovelContent():Unit = runBlocking {
fun testNovelContent(): Unit = runBlocking {

//测试版本:22767441
val novel = client.getNovelContent(23351531)
println(novel.data.filterIsInstance<UploadImageNode>().toList().map { it.url })
println(novel.data.filterIsInstance<UploadImageNode>().map { it.url })
}

@Test
fun testNovelBookmark():Unit = runBlocking {
fun testNovelBookmark(): Unit = runBlocking {
client.bookmarkNovel(21844391)
}

@Test
fun testNovelBookmarkDelete():Unit = runBlocking {
fun testNovelBookmarkDelete(): Unit = runBlocking {
val novel = client.deleteBookmarkNovel(21844391)
println(novel)
}
Expand Down
Loading

0 comments on commit dcf7a3b

Please sign in to comment.