Skip to content

Commit

Permalink
Add Json configuration flag to allow comments
Browse files Browse the repository at this point in the history
in C/Java style for both string and stream parser.

This flag together with allowTrailingCommas and isLenient will help
to cover most use-cases for Json5, for example, configuration files.

Fixes #2221
Fixes #797
  • Loading branch information
sandwwraith committed Mar 5, 2024
1 parent 1f7372a commit fa75b84
Show file tree
Hide file tree
Showing 13 changed files with 509 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/

package kotlinx.benchmarks.json

import kotlinx.benchmarks.model.*
import kotlinx.serialization.json.*
import org.openjdk.jmh.annotations.*
import java.io.*
import java.util.concurrent.*

@Warmup(iterations = 7, time = 1)
@Measurement(iterations = 7, time = 1)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Benchmark)
@Fork(2)
open class TwitterFeedCommentsBenchmark {
val inputBytes = TwitterFeedBenchmark::class.java.getResource("/twitter_macro.json").readBytes()
private val input = inputBytes.decodeToString()
private val inputWithComments = prepareInputWithComments(input)
private val inputWithCommentsBytes = inputWithComments.encodeToByteArray()

private val jsonComments = Json { ignoreUnknownKeys = true; allowComments = true; }
private val jsonNoComments = Json { ignoreUnknownKeys = true; allowComments = false; }

fun prepareInputWithComments(inp: String): String {
val result = inp.lineSequence().map { s ->
// "id", "in_...", "is_...", etc
if (!s.trimStart().startsWith("\"i")) s else "$s // json comment"
}.joinToString("\n")
assert(result.contains("// json comment"))
return result
}

@Setup
fun init() {
// Explicitly invoking both variants before benchmarking so we know that both parser implementation classes are loaded
require("foobar" == jsonComments.decodeFromString<String>("\"foobar\""))
require("foobar" == jsonNoComments.decodeFromString<String>("\"foobar\""))
}

// The difference with TwitterFeedBenchmark.decodeMicroTwitter shows if we slow down when both StringJsonLexer and CommentsJsonLexer
// are loaded by JVM. Should be almost non-existent on modern JVMs (but on OpenJDK-Corretto-11.0.14.1 there is one. 17 is fine.)
@Benchmark
fun decodeMicroTwitter() = jsonNoComments.decodeFromString(MicroTwitterFeed.serializer(), input)

// The difference with this.decodeMicroTwitter shows if we slow down when comments are enabled but no comments present
// in the input. It is around 13% slower than without comments support, mainly because skipWhitespaces is a separate function
// that sometimes is not inlined by JIT.
@Benchmark
fun decodeMicroTwitterCommentSupport() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), input)

// Shows how much actual skipping of the comments takes: around 10%.
@Benchmark
fun decodeMicroTwitterCommentInData() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), inputWithComments)

@Benchmark
fun decodeMicroTwitterCommentSupportStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputBytes))

@Benchmark
fun decodeMicroTwitterCommentInDataStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputWithCommentsBytes))
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ open class TwitterFeedStreamBenchmark {
jacksonObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)


@Setup
fun init() {
// Explicitly invoking decodeFromStream before benchmarking so we know that both parser implementation classes are loaded
require("foobar" == Json.decodeFromStream<String>(ByteArrayInputStream("\"foobar\"".encodeToByteArray())))
require("foobar" == Json.decodeFromString<String>("\"foobar\""))
}


private val inputStream: InputStream
get() = ByteArrayInputStream(bytes)
private val outputStream: OutputStream
Expand Down Expand Up @@ -59,6 +67,7 @@ open class TwitterFeedStreamBenchmark {
}
}

// Difference with TwitterFeedBenchmark.decodeMicroTwitter shows how heavy Java's standard UTF-8 decoding actually is.
@Benchmark
fun decodeMicroTwitterReadText(): MicroTwitterFeed {
return inputStream.use {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/

package kotlinx.serialization.features

import kotlinx.serialization.*
import kotlinx.serialization.json.*
import kotlin.test.*

class JsonCommentsTest: JsonTestBase() {
val json = Json(default) {
allowComments = true
allowTrailingComma = true
}

val withLenient = Json(json) {
isLenient = true
ignoreUnknownKeys = true
}

@Test
fun testBasic() = parametrizedTest { mode ->
val inputBlock = """{"data": "b" /*value b*/ }"""
val inputLine = "{\"data\": \"b\" // value b \n }"
assertEquals(StringData("b"), json.decodeFromString(inputBlock, mode))
assertEquals(StringData("b"), json.decodeFromString(inputLine, mode))
}

@Serializable
data class Target(val key: String, val key2: List<Int>, val key3: NestedTarget, val key4: String)

@Serializable
data class NestedTarget(val nestedKey: String)

private fun target(key4: String): Target = Target("value", listOf(1, 2), NestedTarget("foo"), key4)

@Test
fun testAllBlocks() = parametrizedTest { mode ->
val input = """{ /*beginning*/
/*before key*/ "key" /*after key*/ : /*after colon*/ "value" /*before comma*/,
"key2": [ /*array1*/ 1, /*array2*/ 2, /*end array*/],
"key3": { /*nested obj*/ "nestedKey": "foo"} /*after nested*/,
"key4": "/*comment inside quotes is a part of value*/",
/*before end*/
}"""
assertEquals(target("/*comment inside quotes is a part of value*/"), json.decodeFromString(input, mode))
}

@Test
fun testAllLines() = parametrizedTest { mode ->
val input = """{ //beginning
//before key
"key" // after key
: // after colon
"value" //before comma
,
"key2": [ //array1
1, //array2
2, //end array
],
"key3": { //nested obj
"nestedKey": "foo"
} , //after nested
"key4": "//comment inside quotes is a part of value",
//before end
}"""
assertEquals(target("//comment inside quotes is a part of value"), json.decodeFromString(input, mode))
}

@Test
fun testMixed() = parametrizedTest { mode ->
val input = """{ // begin
"key": "value", // after
"key2": /* array */ [1, 2],
"key3": /* //this is a block comment */ { "nestedKey": // /*this is a line comment*/ "bar"
"foo" },
"key4": /* nesting block comments /* not supported */ "*/"
/* end */}"""
assertEquals(target("*/"), json.decodeFromString(input, mode))
}

@Test
fun testWithLenient() = parametrizedTest { mode ->
val input = """{ //beginning
//before key
key // after key
: // after colon
value //before comma
,
key2: [ //array1
1, //array2
2, //end array
],
key3: { //nested obj
nestedKey: "foo"
} , //after nested
key4: value//comment_cannot_break_value_apart,
key5: //comment without quotes where new token expected is still a comment
value5,
//before end
}"""
assertEquals(target("value//comment_cannot_break_value_apart"), withLenient.decodeFromString(input, mode))
}

@Test
fun testUnclosedCommentsErrorMsg() = parametrizedTest { mode ->
val input = """{"data": "x"} // no newline"""
assertEquals(StringData("x"), json.decodeFromString<StringData>(input, mode))
val input2 = """{"data": "x"} /* no endblock"""
assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") {
json.decodeFromString<StringData>(input2, mode)
}
}

@Test
fun testVeryLargeComments() = parametrizedTest { mode ->
// 16 * 1024 is ReaderJsonLexer.BATCH_SIZE
val strLen = 16 * 1024 * 2 + 42
val inputLine = """{"data": //a""" + "a".repeat(strLen) + "\n\"x\"}"
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputLine, mode))
val inputBlock = """{"data": /*a""" + "a".repeat(strLen) + "*/\"x\"}"
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputBlock, mode))
}

}
3 changes: 3 additions & 0 deletions formats/json/api/kotlinx-serialization-json.api
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public final class kotlinx/serialization/json/JsonBuilder {
public final fun getPrettyPrint ()Z
public final fun getPrettyPrintIndent ()Ljava/lang/String;
public final fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
public final fun getSupportComments ()Z
public final fun getUseAlternativeNames ()Z
public final fun getUseArrayPolymorphism ()Z
public final fun isLenient ()Z
Expand All @@ -126,6 +127,7 @@ public final class kotlinx/serialization/json/JsonBuilder {
public final fun setPrettyPrint (Z)V
public final fun setPrettyPrintIndent (Ljava/lang/String;)V
public final fun setSerializersModule (Lkotlinx/serialization/modules/SerializersModule;)V
public final fun setSupportComments (Z)V
public final fun setUseAlternativeNames (Z)V
public final fun setUseArrayPolymorphism (Z)V
}
Expand Down Expand Up @@ -154,6 +156,7 @@ public final class kotlinx/serialization/json/JsonConfiguration {
public final fun getNamingStrategy ()Lkotlinx/serialization/json/JsonNamingStrategy;
public final fun getPrettyPrint ()Z
public final fun getPrettyPrintIndent ()Ljava/lang/String;
public final fun getSupportComments ()Z
public final fun getUseAlternativeNames ()Z
public final fun getUseArrayPolymorphism ()Z
public final fun isLenient ()Z
Expand Down
21 changes: 19 additions & 2 deletions formats/json/commonMain/src/kotlinx/serialization/json/Json.kt
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,13 @@ public sealed class Json(
* @throws [IllegalArgumentException] if the decoded input cannot be represented as a valid instance of type [T]
*/
public final override fun <T> decodeFromString(deserializer: DeserializationStrategy<T>, @FormatLanguage("json", "", "") string: String): T {
val lexer = StringJsonLexer(string)
val lexer = StringJsonLexer(this, string)
val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer, deserializer.descriptor, null)
val result = input.decodeSerializableValue(deserializer)
lexer.expectEof()
return result
}

/**
* Serializes the given [value] into an equivalent [JsonElement] using the given [serializer]
*
Expand Down Expand Up @@ -385,6 +386,22 @@ public class JsonBuilder internal constructor(json: Json) {
@ExperimentalSerializationApi
public var allowTrailingComma: Boolean = json.configuration.allowTrailingComma

/**
* Allows parser to accept C/Java-style comments in JSON input.
*
* Comments are being skipped and are not stored anywhere; this setting does not affect encoding in any way.
*
* More specifically, a comment is a substring that is not a part of JSON key or value, confirming to one of those:
*
* 1. Starts with `//` characters and ends with a newline character `\n`.
* 2. Starts with `/*` characters and ends with `*/` characters. Nesting block comments
* is not supported: no matter how many `/*` characters you have, first `*/` will end the comment.
*
* `false` by default.
*/
@ExperimentalSerializationApi
public var allowComments: Boolean = json.configuration.allowComments

/**
* Module with contextual and polymorphic serializers to be used in the resulting [Json] instance.
*
Expand Down Expand Up @@ -422,7 +439,7 @@ public class JsonBuilder internal constructor(json: Json) {
allowStructuredMapKeys, prettyPrint, explicitNulls, prettyPrintIndent,
coerceInputValues, useArrayPolymorphism,
classDiscriminator, allowSpecialFloatingPointValues, useAlternativeNames,
namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, classDiscriminatorMode
namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, allowComments, classDiscriminatorMode
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public class JsonConfiguration @OptIn(ExperimentalSerializationApi::class) inter
@ExperimentalSerializationApi
public val allowTrailingComma: Boolean = false,
@ExperimentalSerializationApi
public val allowComments: Boolean = false,
@ExperimentalSerializationApi
public var classDiscriminatorMode: ClassDiscriminatorMode = ClassDiscriminatorMode.POLYMORPHIC,
) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public fun <T> decodeByReader(
deserializer: DeserializationStrategy<T>,
reader: InternalJsonReader
): T {
val lexer = ReaderJsonLexer(reader)
val lexer = ReaderJsonLexer(json, reader)
try {
val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
val result = input.decodeSerializableValue(deserializer)
Expand All @@ -56,7 +56,7 @@ public fun <T> decodeToSequenceByReader(
deserializer: DeserializationStrategy<T>,
format: DecodeSequenceMode = DecodeSequenceMode.AUTO_DETECT
): Sequence<T> {
val lexer = ReaderJsonLexer(reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
val lexer = ReaderJsonLexer(json, reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
val iter = JsonIterator(format, json, lexer, deserializer)
return Sequence { iter }.constrainOnce()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ public fun <T> decodeStringToJsonTree(
deserializer: DeserializationStrategy<T>,
source: String
): JsonElement {
val lexer = StringJsonLexer(source)
val lexer = StringJsonLexer(json, source)
val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
val tree = input.decodeJsonElement()
lexer.expectEof()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ private sealed class AbstractJsonTreeDecoder(
return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' literal when non-nullable $type was expected")
}

override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(StringJsonLexer(getPrimitiveValue(tag).content), json)
else super.decodeTaggedInline(tag, inlineDescriptor)
override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder {
return if (inlineDescriptor.isUnsignedNumber) {
val lexer = StringJsonLexer(json, getPrimitiveValue(tag).content)
JsonDecoderForUnsignedTypes(lexer, json)
} else super.decodeTaggedInline(tag, inlineDescriptor)
}

override fun decodeInline(descriptor: SerialDescriptor): Decoder {
return if (currentTagOrNull != null) super.decodeInline(descriptor)
Expand Down
Loading

0 comments on commit fa75b84

Please sign in to comment.