-
Notifications
You must be signed in to change notification settings - Fork 635
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Json configuration flag to allow comments
in C/Java style for both string and stream parser. This flag together with allowTrailingCommas and isLenient will help to cover most use-cases for Json5, for example, configuration files. Fixes #2221 Fixes #797
- Loading branch information
1 parent
1f7372a
commit fa75b84
Showing
13 changed files
with
509 additions
and
76 deletions.
There are no files selected for viewing
64 changes: 64 additions & 0 deletions
64
benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedCommentsBenchmark.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. | ||
*/ | ||
|
||
package kotlinx.benchmarks.json | ||
|
||
import kotlinx.benchmarks.model.* | ||
import kotlinx.serialization.json.* | ||
import org.openjdk.jmh.annotations.* | ||
import java.io.* | ||
import java.util.concurrent.* | ||
|
||
@Warmup(iterations = 7, time = 1) | ||
@Measurement(iterations = 7, time = 1) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
@State(Scope.Benchmark) | ||
@Fork(2) | ||
open class TwitterFeedCommentsBenchmark { | ||
val inputBytes = TwitterFeedBenchmark::class.java.getResource("/twitter_macro.json").readBytes() | ||
private val input = inputBytes.decodeToString() | ||
private val inputWithComments = prepareInputWithComments(input) | ||
private val inputWithCommentsBytes = inputWithComments.encodeToByteArray() | ||
|
||
private val jsonComments = Json { ignoreUnknownKeys = true; allowComments = true; } | ||
private val jsonNoComments = Json { ignoreUnknownKeys = true; allowComments = false; } | ||
|
||
fun prepareInputWithComments(inp: String): String { | ||
val result = inp.lineSequence().map { s -> | ||
// "id", "in_...", "is_...", etc | ||
if (!s.trimStart().startsWith("\"i")) s else "$s // json comment" | ||
}.joinToString("\n") | ||
assert(result.contains("// json comment")) | ||
return result | ||
} | ||
|
||
@Setup | ||
fun init() { | ||
// Explicitly invoking both variants before benchmarking so we know that both parser implementation classes are loaded | ||
require("foobar" == jsonComments.decodeFromString<String>("\"foobar\"")) | ||
require("foobar" == jsonNoComments.decodeFromString<String>("\"foobar\"")) | ||
} | ||
|
||
// The difference with TwitterFeedBenchmark.decodeMicroTwitter shows if we slow down when both StringJsonLexer and CommentsJsonLexer | ||
// are loaded by JVM. Should be almost non-existent on modern JVMs (but on OpenJDK-Corretto-11.0.14.1 there is one. 17 is fine.) | ||
@Benchmark | ||
fun decodeMicroTwitter() = jsonNoComments.decodeFromString(MicroTwitterFeed.serializer(), input) | ||
|
||
// The difference with this.decodeMicroTwitter shows if we slow down when comments are enabled but no comments present | ||
// in the input. It is around 13% slower than without comments support, mainly because skipWhitespaces is a separate function | ||
// that sometimes is not inlined by JIT. | ||
@Benchmark | ||
fun decodeMicroTwitterCommentSupport() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), input) | ||
|
||
// Shows how much actual skipping of the comments takes: around 10%. | ||
@Benchmark | ||
fun decodeMicroTwitterCommentInData() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), inputWithComments) | ||
|
||
@Benchmark | ||
fun decodeMicroTwitterCommentSupportStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputBytes)) | ||
|
||
@Benchmark | ||
fun decodeMicroTwitterCommentInDataStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputWithCommentsBytes)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
formats/json-tests/commonTest/src/kotlinx/serialization/features/JsonCommentsTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. | ||
*/ | ||
|
||
package kotlinx.serialization.features | ||
|
||
import kotlinx.serialization.* | ||
import kotlinx.serialization.json.* | ||
import kotlin.test.* | ||
|
||
class JsonCommentsTest: JsonTestBase() { | ||
val json = Json(default) { | ||
allowComments = true | ||
allowTrailingComma = true | ||
} | ||
|
||
val withLenient = Json(json) { | ||
isLenient = true | ||
ignoreUnknownKeys = true | ||
} | ||
|
||
@Test | ||
fun testBasic() = parametrizedTest { mode -> | ||
val inputBlock = """{"data": "b" /*value b*/ }""" | ||
val inputLine = "{\"data\": \"b\" // value b \n }" | ||
assertEquals(StringData("b"), json.decodeFromString(inputBlock, mode)) | ||
assertEquals(StringData("b"), json.decodeFromString(inputLine, mode)) | ||
} | ||
|
||
@Serializable | ||
data class Target(val key: String, val key2: List<Int>, val key3: NestedTarget, val key4: String) | ||
|
||
@Serializable | ||
data class NestedTarget(val nestedKey: String) | ||
|
||
private fun target(key4: String): Target = Target("value", listOf(1, 2), NestedTarget("foo"), key4) | ||
|
||
@Test | ||
fun testAllBlocks() = parametrizedTest { mode -> | ||
val input = """{ /*beginning*/ | ||
/*before key*/ "key" /*after key*/ : /*after colon*/ "value" /*before comma*/, | ||
"key2": [ /*array1*/ 1, /*array2*/ 2, /*end array*/], | ||
"key3": { /*nested obj*/ "nestedKey": "foo"} /*after nested*/, | ||
"key4": "/*comment inside quotes is a part of value*/", | ||
/*before end*/ | ||
}""" | ||
assertEquals(target("/*comment inside quotes is a part of value*/"), json.decodeFromString(input, mode)) | ||
} | ||
|
||
@Test | ||
fun testAllLines() = parametrizedTest { mode -> | ||
val input = """{ //beginning | ||
//before key | ||
"key" // after key | ||
: // after colon | ||
"value" //before comma | ||
, | ||
"key2": [ //array1 | ||
1, //array2 | ||
2, //end array | ||
], | ||
"key3": { //nested obj | ||
"nestedKey": "foo" | ||
} , //after nested | ||
"key4": "//comment inside quotes is a part of value", | ||
//before end | ||
}""" | ||
assertEquals(target("//comment inside quotes is a part of value"), json.decodeFromString(input, mode)) | ||
} | ||
|
||
@Test | ||
fun testMixed() = parametrizedTest { mode -> | ||
val input = """{ // begin | ||
"key": "value", // after | ||
"key2": /* array */ [1, 2], | ||
"key3": /* //this is a block comment */ { "nestedKey": // /*this is a line comment*/ "bar" | ||
"foo" }, | ||
"key4": /* nesting block comments /* not supported */ "*/" | ||
/* end */}""" | ||
assertEquals(target("*/"), json.decodeFromString(input, mode)) | ||
} | ||
|
||
@Test | ||
fun testWithLenient() = parametrizedTest { mode -> | ||
val input = """{ //beginning | ||
//before key | ||
key // after key | ||
: // after colon | ||
value //before comma | ||
, | ||
key2: [ //array1 | ||
1, //array2 | ||
2, //end array | ||
], | ||
key3: { //nested obj | ||
nestedKey: "foo" | ||
} , //after nested | ||
key4: value//comment_cannot_break_value_apart, | ||
key5: //comment without quotes where new token expected is still a comment | ||
value5, | ||
//before end | ||
}""" | ||
assertEquals(target("value//comment_cannot_break_value_apart"), withLenient.decodeFromString(input, mode)) | ||
} | ||
|
||
@Test | ||
fun testUnclosedCommentsErrorMsg() = parametrizedTest { mode -> | ||
val input = """{"data": "x"} // no newline""" | ||
assertEquals(StringData("x"), json.decodeFromString<StringData>(input, mode)) | ||
val input2 = """{"data": "x"} /* no endblock""" | ||
assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") { | ||
json.decodeFromString<StringData>(input2, mode) | ||
} | ||
} | ||
|
||
@Test | ||
fun testVeryLargeComments() = parametrizedTest { mode -> | ||
// 16 * 1024 is ReaderJsonLexer.BATCH_SIZE | ||
val strLen = 16 * 1024 * 2 + 42 | ||
val inputLine = """{"data": //a""" + "a".repeat(strLen) + "\n\"x\"}" | ||
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputLine, mode)) | ||
val inputBlock = """{"data": /*a""" + "a".repeat(strLen) + "*/\"x\"}" | ||
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputBlock, mode)) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.