diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index 94f7052cd..ae53dd40b 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -6,7 +6,6 @@ package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.builtins.* -import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.* import kotlin.test.* @@ -79,11 +78,11 @@ class JsonParserTest : JsonTestBase() { fun testTrailingComma() { testTrailingComma("{\"id\":0,}") testTrailingComma("{\"id\":0 ,}") - testTrailingComma("{\"id\":0 , ,}") + testTrailingComma("{\"id\":0 , ,}", message = "Multiple consecutive commas are not allowed in JSON") } - private fun testTrailingComma(content: String) { - assertFailsWithSerialMessage("JsonDecodingException", "Trailing comma before the end of JSON object") { Json.parseToJsonElement(content) } + private fun testTrailingComma(content: String, message: String = "Trailing comma before the end of JSON object") { + assertFailsWithSerialMessage("JsonDecodingException", message) { Json.parseToJsonElement(content) } } @Test diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt new file mode 100644 index 000000000..a3a894dee --- /dev/null +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt @@ -0,0 +1,108 @@ +/* + * Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + */ + +package kotlinx.serialization.json + + +import kotlinx.serialization.* +import kotlinx.serialization.test.* +import kotlin.test.* + +class MissingCommaTest : JsonTestBase() { + @Serializable + class Holder( + val i: Int, + val c: Child, + ) + + @Serializable + class Child( + val i: String + ) + + private val withTrailingComma = Json { allowTrailingComma = true } + + @Test + fun missingCommaBetweenFieldsAfterPrimitive() { + val message = + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" + val json = """{"i":42 "c":{"i":"string"}}""" + + assertFailsWithSerialMessage("JsonDecodingException", message) { + default.decodeFromString(json) + } + } + + @Test + fun missingCommaBetweenFieldsAfterObject() { + val message = + "Unexpected JSON token at offset 19: Expected end of the object or comma at path: \$" + val json = """{"c":{"i":"string"}"i":42}""" + + assertFailsWithSerialMessage("JsonDecodingException", message) { + default.decodeFromString(json) + } + } + + @Test + fun allowTrailingCommaDoesNotApplyToCommaBetweenFields() { + val message = + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" + val json = """{"i":42 "c":{"i":"string"}}""" + + assertFailsWithSerialMessage("JsonDecodingException", message) { + withTrailingComma.decodeFromString(json) + } + } + + @Test + fun lenientSerializeDoesNotAllowToSkipCommaBetweenFields() { + val message = + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" + val json = """{"i":42 "c":{"i":"string"}}""" + + assertFailsWithSerialMessage("JsonDecodingException", message) { + lenient.decodeFromString(json) + } + } + + @Test + fun missingCommaInDynamicMap() { + val m = "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" + val json = """{"i":42 "c":{"i":"string"}}""" + assertFailsWithSerialMessage("JsonDecodingException", m) { + default.parseToJsonElement(json) + } + } + + @Test + fun missingCommaInArray() { + val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$" + val json = """[1 2 3 4]""" + + assertFailsWithSerialMessage("JsonDecodingException", m) { + default.decodeFromString>(json) + } + } + + @Test + fun missingCommaInStringMap() { + val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$" + val json = """{"a":"1" "b":"2"}""" + + assertFailsWithSerialMessage("JsonDecodingException", m) { + default.decodeFromString>(json) + } + } + + @Test + fun missingCommaInUnknownKeys() { + val m = "Unexpected JSON token at offset 17: Expected end of the object or comma at path: \$" + val json = """{"i":"1","b":"2" "c":"2"}""" + + assertFailsWithSerialMessage("JsonDecodingException", m) { + lenient.decodeFromString(json) + } + } +} \ No newline at end of file diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt index 0916de579..d729349d2 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt @@ -125,4 +125,45 @@ class TrailingCommaTest : JsonTestBase() { "wl":{"l":[1, 2, 3,],},}""" assertEquals(Mixed(multipleFields, withMap, withList), tj.decodeFromString(input, mode)) } + + @Test + fun testCommaReportedPosition() = parametrizedTest { mode -> + val sd = """{"a":"1", }""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 8: Trailing comma before the end of JSON object""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasAreNotAllowedEvenWhenTrailingIsEnabled() = parametrizedTest { mode -> + val sd = """{"a":"1",,}""" + checkSerializationException({ + tj.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasError() = parametrizedTest { mode -> + val sd = """{"a":"1",,,}""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt index c885c808f..0f1c22915 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt @@ -46,13 +46,6 @@ internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number): hint = specialFlowingValuesHint) } -internal fun AbstractJsonLexer.invalidTrailingComma(entity: String = "object"): Nothing { - fail("Trailing comma before the end of JSON $entity", - position = currentPosition - 1, - hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." - ) -} - @OptIn(ExperimentalSerializationApi::class) internal fun InvalidKeyKindException(keyDescriptor: SerialDescriptor) = JsonEncodingException( "Value of type '${keyDescriptor.serialName}' can't be used in JSON as a key in the map. " + diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt index 9cb9bb3c5..bcad4cdbb 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt @@ -4,7 +4,7 @@ package kotlinx.serialization.json.internal -import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.* import kotlinx.serialization.json.* @OptIn(ExperimentalSerializationApi::class) @@ -24,53 +24,52 @@ internal class JsonTreeReader( readObjectImpl { callRecursive(Unit) } private inline fun readObjectImpl(reader: () -> JsonElement): JsonObject { - var lastToken = lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.path.pushDescriptor(JsonObjectSerializer.descriptor) + if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = linkedMapOf() while (lexer.canConsumeValue()) { + lexer.path.resetCurrentMapKey() // Read key and value val key = if (isLenient) lexer.consumeStringLenient() else lexer.consumeString() lexer.consumeNextToken(TC_COLON) + + lexer.path.updateCurrentMapKey(key) val element = reader() result[key] = element // Verify the next token - lastToken = lexer.consumeNextToken() - when (lastToken) { - TC_COMMA -> Unit // no-op, can continue with `canConsumeValue` that verifies the token after comma - TC_END_OBJ -> break // `canConsumeValue` can return incorrect result, since it checks token _after_ TC_END_OBJ - else -> lexer.fail("Expected end of the object or comma") - } + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = '}', + ) } - // Check for the correct ending - if (lastToken == TC_BEGIN_OBJ) { // Case of empty object - lexer.consumeNextToken(TC_END_OBJ) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma() - lexer.consumeNextToken(TC_END_OBJ) - } // else unexpected token? + + lexer.consumeNextToken(TC_END_OBJ) + lexer.path.popDescriptor() + return JsonObject(result) } private fun readArray(): JsonElement { - var lastToken = lexer.consumeNextToken() + lexer.consumeNextToken(TC_BEGIN_LIST) + lexer.path.pushDescriptor(JsonArraySerializer.descriptor) // Prohibit leading comma if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = arrayListOf() while (lexer.canConsumeValue()) { + lexer.path.updateDescriptorIndex(result.size) val element = read() result.add(element) - lastToken = lexer.consumeNextToken() - if (lastToken != TC_COMMA) { - lexer.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } - } - } - // Check for the correct ending - if (lastToken == TC_BEGIN_LIST) { // Case of empty object - lexer.consumeNextToken(TC_END_LIST) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma("array") - lexer.consumeNextToken(TC_END_LIST) + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = ']', + entity = "array" + ) } + lexer.consumeNextToken(TC_END_LIST) + lexer.path.popDescriptor() + return JsonArray(result) } @@ -103,6 +102,7 @@ internal class JsonTreeReader( --stackDepth result } + TC_BEGIN_LIST -> readArray() else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}") } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index 9477db629..807e2c397 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -45,7 +45,8 @@ internal open class StreamingJsonDecoder( private var discriminatorHolder: DiscriminatorHolder? = discriminatorHolder private val configuration = json.configuration - private val elementMarker: JsonElementMarker? = if (configuration.explicitNulls) null else JsonElementMarker(descriptor) + private val elementMarker: JsonElementMarker? = + if (configuration.explicitNulls) null else JsonElementMarker(descriptor) override fun decodeJsonElement(): JsonElement = JsonTreeReader(json.configuration, lexer).read() @@ -76,14 +77,14 @@ internal open class StreamingJsonDecoder( @Suppress("UNCHECKED_CAST") val actualSerializer = try { - deserializer.findPolymorphicSerializer(this, type) - } catch (it: SerializationException) { // Wrap SerializationException into JsonDecodingException to preserve position, path, and input. - // Split multiline message from private core function: - // core/commonMain/src/kotlinx/serialization/internal/AbstractPolymorphicSerializer.kt:102 - val message = it.message!!.substringBefore('\n').removeSuffix(".") - val hint = it.message!!.substringAfter('\n', missingDelimiterValue = "") - lexer.fail(message, hint = hint) - } as DeserializationStrategy + deserializer.findPolymorphicSerializer(this, type) as DeserializationStrategy + } catch (it: SerializationException) { // Wrap SerializationException into JsonDecodingException to preserve position, path, and input. + // Split multiline message from private core function: + // core/commonMain/src/kotlinx/serialization/internal/AbstractPolymorphicSerializer.kt:102 + val message = it.message!!.substringBefore('\n').removeSuffix(".") + val hint = it.message!!.substringAfter('\n', missingDelimiterValue = "") + lexer.fail(message, hint = hint) + } discriminatorHolder = DiscriminatorHolder(discriminator) return actualSerializer.deserialize(this) @@ -110,11 +111,8 @@ internal open class StreamingJsonDecoder( descriptor, discriminatorHolder ) - else -> if (mode == newMode && json.configuration.explicitNulls) { - this - } else { - StreamingJsonDecoder(json, newMode, lexer, descriptor, discriminatorHolder) - } + + else -> StreamingJsonDecoder(json, newMode, lexer, descriptor, discriminatorHolder) } } @@ -125,7 +123,6 @@ internal open class StreamingJsonDecoder( if (descriptor.elementsCount == 0 && descriptor.ignoreUnknownKeys(json)) { skipLeftoverElements(descriptor) } - if (lexer.tryConsumeComma() && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("") // First consume the object so we know it's correct lexer.consumeNextToken(mode.end) // Then cleanup the path @@ -187,24 +184,21 @@ internal open class StreamingJsonDecoder( } private fun decodeMapIndex(): Int { - var hasComma = false val decodingKey = currentIndex % 2 != 0 if (decodingKey) { if (currentIndex != -1) { - hasComma = lexer.tryConsumeComma() + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) } } else { lexer.consumeNextToken(COLON) } return if (lexer.canConsumeValue()) { - if (decodingKey) { - if (currentIndex == -1) lexer.require(!hasComma) { "Unexpected leading comma" } - else lexer.require(hasComma) { "Expected comma after the key-value pair" } - } ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma() CompositeDecoder.DECODE_DONE } } @@ -220,35 +214,38 @@ internal open class StreamingJsonDecoder( ) private fun decodeObjectIndex(descriptor: SerialDescriptor): Int { - // hasComma checks are required to properly react on trailing commas - var hasComma = lexer.tryConsumeComma() - while (lexer.canConsumeValue()) { // TODO: consider merging comma consumption and this check - hasComma = false + while (true) { + if (currentIndex != -1) { + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) + } + + if (!lexer.canConsumeValue()) break + val key = decodeStringKey() lexer.consumeNextToken(COLON) val index = descriptor.getJsonNameIndex(json, key) - val isUnknown = if (index != UNKNOWN_NAME) { - if (configuration.coerceInputValues && coerceInputValue(descriptor, index)) { - hasComma = lexer.tryConsumeComma() - false // Known element, but coerced - } else { - elementMarker?.mark(index) - return index // Known element without coercing, return it - } - } else { - true // unknown element + currentIndex = index + + if (index == UNKNOWN_NAME) { + handleUnknown(descriptor, key) + continue } - if (isUnknown) { // slow-path for unknown keys handling - hasComma = handleUnknown(descriptor, key) + if (configuration.coerceInputValues && coerceInputValue(descriptor, index)) { + continue } + + elementMarker?.mark(index) + return index } - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma() return elementMarker?.nextUnmarkedIndex() ?: CompositeDecoder.DECODE_DONE } - private fun handleUnknown(descriptor: SerialDescriptor, key: String): Boolean { + private fun handleUnknown(descriptor: SerialDescriptor, key: String) { if (descriptor.ignoreUnknownKeys(json) || discriminatorHolder.trySkip(key)) { lexer.skipElement(configuration.isLenient) } else { @@ -257,17 +254,20 @@ internal open class StreamingJsonDecoder( lexer.path.popDescriptor() lexer.failOnUnknownKey(key) } - return lexer.tryConsumeComma() } private fun decodeListIndex(): Int { - // Prohibit leading comma - val hasComma = lexer.tryConsumeComma() + if (currentIndex != -1) { + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end, + entity = "array" + ) + } + return if (lexer.canConsumeValue()) { - if (currentIndex != -1 && !hasComma) lexer.fail("Expected end of the array or comma") ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("array") CompositeDecoder.DECODE_DONE } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index 5f570a95e..8b755491d 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -168,17 +168,6 @@ internal abstract class AbstractJsonLexer { abstract fun consumeNextToken(): Byte - fun tryConsumeComma(): Boolean { - val current = skipWhitespaces() - val source = source - if (current >= source.length || current == -1) return false - if (source[current] == ',') { - ++currentPosition - return true - } - return false - } - protected fun isValidValueStart(c: Char): Boolean { return when (c) { '}', ']', ':', ',' -> false @@ -759,4 +748,33 @@ internal abstract class AbstractJsonLexer { currentPosition = snapshot } } + + fun consumeCommaOrPeekEnd( + allowTrailing: Boolean, + expectedEnd: Char, + entity: String = "object", + ) { + val nextToken = peekNextToken() + if (nextToken == charToTokenClass(expectedEnd)) return + if (nextToken != TC_COMMA) { + path.popDescriptor() + fail("Expected end of the $entity or comma") + } + val commaPosition = currentPosition + consumeNextToken() // consume comma + + val peeked = peekNextToken() + if (peeked == charToTokenClass(expectedEnd) && !allowTrailing) { + path.popDescriptor() + fail( + "Trailing comma before the end of JSON $entity", + position = commaPosition, + hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." + ) + } + if (peeked == TC_COMMA) { + path.popDescriptor() + fail("Multiple consecutive commas are not allowed in JSON") + } + } }