diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index b2394ed2a..61078e2b9 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -1171,7 +1171,7 @@ private void resetSymbolTable() { * @param newSymbols the symbols to install. */ protected void installSymbols(List newSymbols) { - if (newSymbols != null) { + if (newSymbols != null && !newSymbols.isEmpty()) { int numberOfNewSymbols = newSymbols.size(); int numberOfAvailableSlots = symbols.length - (localSymbolMaxOffset + 1); int shortfall = numberOfNewSymbols - numberOfAvailableSlots; diff --git a/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt index 8d5a77f1b..fef337a53 100644 --- a/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt @@ -97,7 +97,7 @@ enum class SystemSymbols_1_1(val id: Int, val text: String) { } @JvmStatic - private val BY_NAME = ALL_VALUES.fold(HashMap(ALL_VALUES.size)) { map, s -> + private val BY_NAME: HashMap = ALL_VALUES.fold(HashMap(ALL_VALUES.size)) { map, s -> check(map.put(s.text, s) == null) { "Duplicate system symbol text: ${s.id}=${s.text}" } map } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index 0da94f06e..648fdcb7f 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -110,11 +110,13 @@ internal class IonManagedWriter_1_1( } } + private var needsIVM: Boolean = false + // We take a slightly different approach here by handling the encoding context as a prior encoding context // plus a list of symbols added by the current encoding context. - /** The symbol table for the prior encoding context */ private var symbolTable: HashMap = HashMap() + /** Symbols to be interned since the prior encoding context. */ private var newSymbols: HashMap = LinkedHashMap() // Preserves insertion order. @@ -257,15 +259,14 @@ internal class IonManagedWriter_1_1( // Only called by `finish()` private fun resetEncodingContext() { - // TODO: Make sure that if a value is written after this method is called, we - // emit an IVM or empty encoding directive before the next user value - // in order to avoid writing a data stream with leaky context. if (depth != 0) throw IllegalStateException("Cannot reset the encoding context while stepped in any value.") symbolTable.clear() macroNames.clear() macrosById.clear() macroTable.clear() newMacros.clear() + + needsIVM = true } /** Helper function for writing encoding directives */ @@ -275,6 +276,13 @@ internal class IonManagedWriter_1_1( systemData.stepOut() } + /** Helper function for writing encoding directives */ + private inline fun writeSystemMacro(macro: SystemMacro, content: PrivateIonRawWriter_1_1.() -> Unit) { + systemData.stepInEExp(macro) + systemData.content() + systemData.stepOut() + } + /** * Writes an encoding directive for the current encoding context, and updates internal state accordingly. * This always appends to the current encoding context. If there is nothing to append, calling this function @@ -283,16 +291,13 @@ internal class IonManagedWriter_1_1( private fun writeEncodingDirective() { if (newSymbols.isEmpty() && newMacros.isEmpty()) return - systemData.writeAnnotations(SystemSymbols_1_1.ION_ENCODING) - writeSystemSexp { - writeSymbolTableClause() - writeMacroTableClause() - } - - // NOTE: We don't update symbolTable until after the macro_table is written because - // the new symbols aren't available until _after_ this encoding directive. + writeSymbolTableClause() symbolTable.putAll(newSymbols) newSymbols.clear() + // NOTE: Once we have emitted the symbol table update with set/add_symbols those symbols become available + // for use in set/add_macros (if relevant) + + writeMacroTableClause() macroTable.putAll(newMacros) newMacros.clear() } @@ -305,26 +310,16 @@ internal class IonManagedWriter_1_1( private fun writeSymbolTableClause() { val hasSymbolsToAdd = newSymbols.isNotEmpty() val hasSymbolsToRetain = symbolTable.isNotEmpty() - if (!hasSymbolsToAdd && !hasSymbolsToRetain) return + if (!hasSymbolsToAdd) return - writeSystemSexp { - forceNoNewlines(true) - systemData.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE) - - // Add previous symbol table - if (hasSymbolsToRetain) { - if (newSymbols.size > 0) forceNoNewlines(false) - writeSymbol(SystemSymbols_1_1.ION_ENCODING) - } + val macro = if (!hasSymbolsToRetain) SystemMacro.SetSymbols else SystemMacro.AddSymbols - // Add new symbols - if (hasSymbolsToAdd) { - stepInList(usingLengthPrefix = false) - if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) - newSymbols.forEach { (text, _) -> writeString(text) } - stepOut() - } - forceNoNewlines(true) + // Add new symbols + writeSystemMacro(macro) { + stepInExpressionGroup(usingLengthPrefix = false) + if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) + newSymbols.forEach { (text, _) -> writeString(text) } + stepOut() } systemData.forceNoNewlines(false) } @@ -337,16 +332,13 @@ internal class IonManagedWriter_1_1( private fun writeMacroTableClause() { val hasMacrosToAdd = newMacros.isNotEmpty() val hasMacrosToRetain = macroTable.isNotEmpty() - if (!hasMacrosToAdd && !hasMacrosToRetain) return + if (!hasMacrosToAdd) return - writeSystemSexp { - forceNoNewlines(true) - writeSymbol(SystemSymbols_1_1.MACRO_TABLE) - if (newMacros.size > 0) forceNoNewlines(false) - if (hasMacrosToRetain) { - writeSymbol(SystemSymbols_1_1.ION_ENCODING) - } + val macro = if (!hasMacrosToRetain) SystemMacro.SetMacros else SystemMacro.AddMacros + + writeSystemMacro(macro) { forceNoNewlines(false) + stepInExpressionGroup(usingLengthPrefix = false) newMacros.forEach { (macro, address) -> val name = macroNames[address] when (macro) { @@ -359,7 +351,7 @@ internal class IonManagedWriter_1_1( } } } - forceNoNewlines(true) + stepOut() } systemData.forceNoNewlines(false) } @@ -653,7 +645,6 @@ internal class IonManagedWriter_1_1( if (depth == 0) { // Make sure we write out any symbol tables and buffered values before the IVM finish() - systemData.writeIVM() } else { writeSymbol("\$ion_1_1") } @@ -814,6 +805,10 @@ internal class IonManagedWriter_1_1( } override fun flush() { + if (needsIVM) { + systemData.writeIVM() + needsIVM = false + } writeEncodingDirective() systemData.flush() userData.flush() diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt index e8e29185a..7c3d23bc0 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -155,7 +155,7 @@ internal class IonManagedWriter_1_1_Test { fun `write an encoding directive with a non-empty macro table`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null () "foo"))) + (:$ion::set_macros (:: (macro null () "foo"))) """.trimIndent() val actual = write { @@ -169,7 +169,7 @@ internal class IonManagedWriter_1_1_Test { fun `write an e-expression by name`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro a () "foo"))) + (:$ion::set_macros (:: (macro a () "foo"))) (:a) """.trimIndent() @@ -185,7 +185,7 @@ internal class IonManagedWriter_1_1_Test { fun `write an e-expression by address`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null () "foo"))) + (:$ion::set_macros (:: (macro null () "foo"))) (:0) """.trimIndent() @@ -209,7 +209,7 @@ internal class IonManagedWriter_1_1_Test { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null (a* b*) "foo"))) + (:$ion::set_macros (:: (macro null (a* b*) "foo"))) (:0 (::) (:: 1 2 3)) """.trimIndent() @@ -235,7 +235,7 @@ internal class IonManagedWriter_1_1_Test { fun `getOrAssignMacroAddress can add a system macro to the macro table`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (export $ion::make_string))) + (:$ion::set_macros (:: (export $ion::make_string))) """.trimIndent() val actual = write { @@ -249,7 +249,7 @@ internal class IonManagedWriter_1_1_Test { fun `when a system macro is shadowed, it should be written using the system e-exp syntax`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro make_string () "make"))) + (:$ion::set_macros (:: (macro make_string () "make"))) (:make_string) (:$ion::make_string (:: "a" b)) """.trimIndent() @@ -277,7 +277,7 @@ internal class IonManagedWriter_1_1_Test { fun `it is possible to invoke a system macro using an alias`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (export $ion::make_string foo))) + (:$ion::set_macros (:: (export $ion::make_string foo))) (:foo (:: "a" b)) """.trimIndent() @@ -296,7 +296,7 @@ internal class IonManagedWriter_1_1_Test { fun `write an encoding directive with a non-empty symbol table`() { val expected = """ $ion_1_1 - $ion_encoding::((symbol_table ["foo"])) + (:$ion::set_symbols (:: "foo")) $1 """.trimIndent() @@ -311,9 +311,9 @@ internal class IonManagedWriter_1_1_Test { fun `calling flush() causes the next encoding directive to append to a macro table`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null () "foo"))) + (:$ion::set_macros (:: (macro null () "foo"))) (:0) - $ion_encoding::((macro_table $ion_encoding (macro null () "bar"))) + (:$ion::add_macros (:: (macro null () "bar"))) (:0) (:1) """.trimIndent() @@ -336,9 +336,9 @@ internal class IonManagedWriter_1_1_Test { fun `calling flush() causes the next encoding directive to append to the symbol table`() { val expected = """ $ion_1_1 - $ion_encoding::((symbol_table ["foo"])) + (:$ion::set_symbols (:: "foo")) $1 - $ion_encoding::((symbol_table $ion_encoding ["bar"])) + (:$ion::add_symbols (:: "bar")) $2 """.trimIndent() @@ -355,9 +355,10 @@ internal class IonManagedWriter_1_1_Test { fun `calling finish() causes the next encoding directive to NOT append to a macro table`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null () "foo"))) + (:$ion::set_macros (:: (macro null () "foo"))) (:0) - $ion_encoding::((macro_table (macro null () "bar"))) + $ion_1_1 + (:$ion::set_macros (:: (macro null () "bar"))) (:0) """.trimIndent() @@ -376,9 +377,10 @@ internal class IonManagedWriter_1_1_Test { fun `calling finish() causes the next encoding directive to NOT append to the symbol table`() { val expected = """ $ion_1_1 - $ion_encoding::((symbol_table ["foo"])) + (:$ion::set_symbols (:: "foo")) $1 - $ion_encoding::((symbol_table ["bar"])) + $ion_1_1 + (:$ion::set_symbols (:: "bar")) $1 """.trimIndent() @@ -395,9 +397,9 @@ internal class IonManagedWriter_1_1_Test { fun `adding to the macro table should preserve existing symbols`() { val expected = """ $ion_1_1 - $ion_encoding::((symbol_table ["foo"])) + (:$ion::set_symbols (:: "foo")) $1 - $ion_encoding::((symbol_table $ion_encoding) (macro_table (macro null () "foo"))) + (:$ion::set_macros (:: (macro null () "foo"))) """.trimIndent() val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { @@ -413,8 +415,8 @@ internal class IonManagedWriter_1_1_Test { fun `adding to the symbol table should preserve existing macros`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null () "foo"))) - $ion_encoding::((symbol_table ["foo"]) (macro_table $ion_encoding)) + (:$ion::set_macros (:: (macro null () "foo"))) + (:$ion::set_symbols (:: "foo")) $1 (:0) """.trimIndent() @@ -691,7 +693,7 @@ internal class IonManagedWriter_1_1_Test { fun testWritingMacroDefinitions(description: String, macro: Macro, expectedSignatureAndBody: String) { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro foo () "foo") (macro null () "bar") (macro null $expectedSignatureAndBody))) + (:$ion::set_macros (:: (macro foo () "foo") (macro null () "bar") (macro null $expectedSignatureAndBody))) """.trimIndent() val actual = write { @@ -709,24 +711,25 @@ internal class IonManagedWriter_1_1_Test { // However, this should be held loosely. val expected = """ $ion_1_1 - $ion_encoding::( - (symbol_table ["foo","bar","baz"]) - (macro_table + (:$ion::set_symbols + (:: "foo" "bar" "baz")) + (:$ion::set_macros + (:: (macro null () "foo") - (macro null (x) (.0 (%x) "bar" (..) (.. "baz")))) + (macro null (x) (.0 (%x) "bar" (..) (.. "baz"))) + ) ) $1 $2 $3 (:0) (:1) - $ion_encoding::( - (symbol_table - $ion_encoding - ["a","b","c"]) - (macro_table - $ion_encoding - (macro null () "abc")) + (:$ion::add_symbols + (:: "a" "b" "c")) + (:$ion::add_macros + (:: + (macro null () "abc") + ) ) $4 $5 @@ -773,7 +776,7 @@ internal class IonManagedWriter_1_1_Test { fun `writeObject() should write something with a macro representation`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro Point2D (x y) {x:(%x),y:(%y)}))) + (:$ion::set_macros (:: (macro Point2D (x y) {x:(%x),y:(%y)}))) (:Point2D 2 4) """.trimIndent() @@ -808,7 +811,7 @@ internal class IonManagedWriter_1_1_Test { fun `writeObject() should write something with nested macro representation`() { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null (x*) (%x)) (macro Polygon (vertices+ compact_symbol::fill?) {vertices:[(%vertices)],fill:(.0 (%fill))}) (macro Point2D (x y) {x:(%x),y:(%y)}))) + (:$ion::set_macros (:: (macro null (x*) (%x)) (macro Polygon (vertices+ compact_symbol::fill?) {vertices:[(%vertices)],fill:(.0 (%fill))}) (macro Point2D (x y) {x:(%x),y:(%y)}))) (:Polygon (:: (:Point2D 0 0) (:Point2D 0 1) (:Point2D 1 1) (:Point2D 1 0)) Blue) """.trimIndent()