Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use add/set_macros/symbols in ManagedWriter #987

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ private void resetSymbolTable() {
* @param newSymbols the symbols to install.
*/
protected void installSymbols(List<String> newSymbols) {
if (newSymbols != null) {
if (newSymbols != null && !newSymbols.isEmpty()) {
int numberOfNewSymbols = newSymbols.size();
int numberOfAvailableSlots = symbols.length - (localSymbolMaxOffset + 1);
int shortfall = numberOfNewSymbols - numberOfAvailableSlots;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ enum class SystemSymbols_1_1(val id: Int, val text: String) {
}

@JvmStatic
private val BY_NAME = ALL_VALUES.fold(HashMap<String, SystemSymbols_1_1>(ALL_VALUES.size)) { map, s ->
private val BY_NAME: HashMap<String, SystemSymbols_1_1> = ALL_VALUES.fold(HashMap(ALL_VALUES.size)) { map, s ->
check(map.put(s.text, s) == null) { "Duplicate system symbol text: ${s.id}=${s.text}" }
map
}
Expand Down
75 changes: 35 additions & 40 deletions src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,13 @@ internal class IonManagedWriter_1_1(
}
}

private var needsIVM: Boolean = false

// We take a slightly different approach here by handling the encoding context as a prior encoding context
// plus a list of symbols added by the current encoding context.

/** The symbol table for the prior encoding context */
private var symbolTable: HashMap<String, Int> = HashMap()

/** Symbols to be interned since the prior encoding context. */
private var newSymbols: HashMap<String, Int> = LinkedHashMap() // Preserves insertion order.

Expand Down Expand Up @@ -257,15 +259,14 @@ internal class IonManagedWriter_1_1(

// Only called by `finish()`
private fun resetEncodingContext() {
// TODO: Make sure that if a value is written after this method is called, we
// emit an IVM or empty encoding directive before the next user value
// in order to avoid writing a data stream with leaky context.
if (depth != 0) throw IllegalStateException("Cannot reset the encoding context while stepped in any value.")
symbolTable.clear()
macroNames.clear()
macrosById.clear()
macroTable.clear()
newMacros.clear()

needsIVM = true
popematt marked this conversation as resolved.
Show resolved Hide resolved
}

/** Helper function for writing encoding directives */
Expand All @@ -275,6 +276,13 @@ internal class IonManagedWriter_1_1(
systemData.stepOut()
}

/** Helper function for writing encoding directives */
private inline fun writeSystemMacro(macro: SystemMacro, content: PrivateIonRawWriter_1_1.() -> Unit) {
systemData.stepInEExp(macro)
systemData.content()
systemData.stepOut()
}

/**
* Writes an encoding directive for the current encoding context, and updates internal state accordingly.
* This always appends to the current encoding context. If there is nothing to append, calling this function
Expand All @@ -283,16 +291,13 @@ internal class IonManagedWriter_1_1(
private fun writeEncodingDirective() {
if (newSymbols.isEmpty() && newMacros.isEmpty()) return

systemData.writeAnnotations(SystemSymbols_1_1.ION_ENCODING)
writeSystemSexp {
writeSymbolTableClause()
writeMacroTableClause()
}

// NOTE: We don't update symbolTable until after the macro_table is written because
// the new symbols aren't available until _after_ this encoding directive.
writeSymbolTableClause()
symbolTable.putAll(newSymbols)
newSymbols.clear()
// NOTE: Once we have emitted the symbol table update with set/add_symbols those symbols become available
// for use in set/add_macros (if relevant)

writeMacroTableClause()
macroTable.putAll(newMacros)
newMacros.clear()
}
Expand All @@ -305,26 +310,16 @@ internal class IonManagedWriter_1_1(
private fun writeSymbolTableClause() {
val hasSymbolsToAdd = newSymbols.isNotEmpty()
val hasSymbolsToRetain = symbolTable.isNotEmpty()
if (!hasSymbolsToAdd && !hasSymbolsToRetain) return
if (!hasSymbolsToAdd) return

writeSystemSexp {
forceNoNewlines(true)
systemData.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE)

// Add previous symbol table
if (hasSymbolsToRetain) {
if (newSymbols.size > 0) forceNoNewlines(false)
writeSymbol(SystemSymbols_1_1.ION_ENCODING)
}
val macro = if (!hasSymbolsToRetain) SystemMacro.SetSymbols else SystemMacro.AddSymbols

// Add new symbols
if (hasSymbolsToAdd) {
stepInList(usingLengthPrefix = false)
if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true)
newSymbols.forEach { (text, _) -> writeString(text) }
stepOut()
}
forceNoNewlines(true)
// Add new symbols
writeSystemMacro(macro) {
stepInExpressionGroup(usingLengthPrefix = false)
if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true)
newSymbols.forEach { (text, _) -> writeString(text) }
stepOut()
}
systemData.forceNoNewlines(false)
}
Expand All @@ -337,16 +332,13 @@ internal class IonManagedWriter_1_1(
private fun writeMacroTableClause() {
val hasMacrosToAdd = newMacros.isNotEmpty()
val hasMacrosToRetain = macroTable.isNotEmpty()
if (!hasMacrosToAdd && !hasMacrosToRetain) return
if (!hasMacrosToAdd) return

writeSystemSexp {
forceNoNewlines(true)
writeSymbol(SystemSymbols_1_1.MACRO_TABLE)
if (newMacros.size > 0) forceNoNewlines(false)
if (hasMacrosToRetain) {
writeSymbol(SystemSymbols_1_1.ION_ENCODING)
}
val macro = if (!hasMacrosToRetain) SystemMacro.SetMacros else SystemMacro.AddMacros

writeSystemMacro(macro) {
forceNoNewlines(false)
stepInExpressionGroup(usingLengthPrefix = false)
newMacros.forEach { (macro, address) ->
val name = macroNames[address]
when (macro) {
Expand All @@ -359,7 +351,7 @@ internal class IonManagedWriter_1_1(
}
}
}
forceNoNewlines(true)
stepOut()
}
systemData.forceNoNewlines(false)
}
Expand Down Expand Up @@ -653,7 +645,6 @@ internal class IonManagedWriter_1_1(
if (depth == 0) {
// Make sure we write out any symbol tables and buffered values before the IVM
finish()
systemData.writeIVM()
} else {
writeSymbol("\$ion_1_1")
}
Expand Down Expand Up @@ -814,6 +805,10 @@ internal class IonManagedWriter_1_1(
}

override fun flush() {
if (needsIVM) {
systemData.writeIVM()
needsIVM = false
}
writeEncodingDirective()
systemData.flush()
userData.flush()
Expand Down
71 changes: 37 additions & 34 deletions src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ internal class IonManagedWriter_1_1_Test {
fun `write an encoding directive with a non-empty macro table`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null () "foo")))
(:$ion::set_macros (:: (macro null () "foo")))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In these cases, the $ion:: prefix is not required, right, since the IVM installs the system module? This can be out of scope for this PR, but we might as well refine this at some point to be as succinct as possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's the raw writer that is determining this behavior. I'd also like to make this more succinct.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the managed writer that needs to determine the behavior because the raw writer should not know whether something is in the macro table or not.

The stepInEExp(SystemMacro) method in the raw writer must always write a system e-expression using the specific text or binary syntax for a system e-exp (which is the current behavior).

To get rid fo the unnecessary $ion prefix, the managed writer need to check whether the desired system macro is in the encoding module. If it is in the encoding module, then the managed writer can use stepInEExp(CharSequence) or stepInEExp(Int, Boolean, Macro), similar to the logic in startMacro (see below). If the system macro is not in the encoding module, then the managed writer needs to call stepInEExp(SystemMacro) (which results in the qualified invocation in text or the EF opcode in binary).

private fun startMacro(name: String?, address: Int, definition: Macro) {
val useNames = options.eExpressionIdentifierStrategy == ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME
if (useNames && name != null) {
userData.stepInEExp(name)
} else {
val includeLengthPrefix = options.writeLengthPrefix(ContainerType.EEXP, depth + 1)
userData.stepInEExp(address, includeLengthPrefix, definition)
}
}

""".trimIndent()

val actual = write {
Expand All @@ -169,7 +169,7 @@ internal class IonManagedWriter_1_1_Test {
fun `write an e-expression by name`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro a () "foo")))
(:$ion::set_macros (:: (macro a () "foo")))
(:a)
""".trimIndent()

Expand All @@ -185,7 +185,7 @@ internal class IonManagedWriter_1_1_Test {
fun `write an e-expression by address`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null () "foo")))
(:$ion::set_macros (:: (macro null () "foo")))
(:0)
""".trimIndent()

Expand All @@ -209,7 +209,7 @@ internal class IonManagedWriter_1_1_Test {

val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null (a* b*) "foo")))
(:$ion::set_macros (:: (macro null (a* b*) "foo")))
(:0 (::) (:: 1 2 3))
""".trimIndent()

Expand All @@ -235,7 +235,7 @@ internal class IonManagedWriter_1_1_Test {
fun `getOrAssignMacroAddress can add a system macro to the macro table`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (export $ion::make_string)))
(:$ion::set_macros (:: (export $ion::make_string)))
""".trimIndent()

val actual = write {
Expand All @@ -249,7 +249,7 @@ internal class IonManagedWriter_1_1_Test {
fun `when a system macro is shadowed, it should be written using the system e-exp syntax`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro make_string () "make")))
(:$ion::set_macros (:: (macro make_string () "make")))
(:make_string)
(:$ion::make_string (:: "a" b))
""".trimIndent()
Expand Down Expand Up @@ -277,7 +277,7 @@ internal class IonManagedWriter_1_1_Test {
fun `it is possible to invoke a system macro using an alias`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (export $ion::make_string foo)))
(:$ion::set_macros (:: (export $ion::make_string foo)))
(:foo (:: "a" b))
""".trimIndent()

Expand All @@ -296,7 +296,7 @@ internal class IonManagedWriter_1_1_Test {
fun `write an encoding directive with a non-empty symbol table`() {
val expected = """
$ion_1_1
$ion_encoding::((symbol_table ["foo"]))
(:$ion::set_symbols (:: "foo"))
$1
""".trimIndent()

Expand All @@ -311,9 +311,9 @@ internal class IonManagedWriter_1_1_Test {
fun `calling flush() causes the next encoding directive to append to a macro table`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null () "foo")))
(:$ion::set_macros (:: (macro null () "foo")))
(:0)
$ion_encoding::((macro_table $ion_encoding (macro null () "bar")))
(:$ion::add_macros (:: (macro null () "bar")))
(:0)
(:1)
""".trimIndent()
Expand All @@ -336,9 +336,9 @@ internal class IonManagedWriter_1_1_Test {
fun `calling flush() causes the next encoding directive to append to the symbol table`() {
val expected = """
$ion_1_1
$ion_encoding::((symbol_table ["foo"]))
(:$ion::set_symbols (:: "foo"))
$1
$ion_encoding::((symbol_table $ion_encoding ["bar"]))
(:$ion::add_symbols (:: "bar"))
$2
""".trimIndent()

Expand All @@ -355,9 +355,10 @@ internal class IonManagedWriter_1_1_Test {
fun `calling finish() causes the next encoding directive to NOT append to a macro table`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null () "foo")))
(:$ion::set_macros (:: (macro null () "foo")))
(:0)
$ion_encoding::((macro_table (macro null () "bar")))
$ion_1_1
(:$ion::set_macros (:: (macro null () "bar")))
(:0)
""".trimIndent()

Expand All @@ -376,9 +377,10 @@ internal class IonManagedWriter_1_1_Test {
fun `calling finish() causes the next encoding directive to NOT append to the symbol table`() {
val expected = """
$ion_1_1
$ion_encoding::((symbol_table ["foo"]))
(:$ion::set_symbols (:: "foo"))
$1
$ion_encoding::((symbol_table ["bar"]))
$ion_1_1
(:$ion::set_symbols (:: "bar"))
$1
""".trimIndent()

Expand All @@ -395,9 +397,9 @@ internal class IonManagedWriter_1_1_Test {
fun `adding to the macro table should preserve existing symbols`() {
val expected = """
$ion_1_1
$ion_encoding::((symbol_table ["foo"]))
(:$ion::set_symbols (:: "foo"))
$1
$ion_encoding::((symbol_table $ion_encoding) (macro_table (macro null () "foo")))
(:$ion::set_macros (:: (macro null () "foo")))
""".trimIndent()

val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) {
Expand All @@ -413,8 +415,8 @@ internal class IonManagedWriter_1_1_Test {
fun `adding to the symbol table should preserve existing macros`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null () "foo")))
$ion_encoding::((symbol_table ["foo"]) (macro_table $ion_encoding))
(:$ion::set_macros (:: (macro null () "foo")))
(:$ion::set_symbols (:: "foo"))
$1
(:0)
""".trimIndent()
Expand Down Expand Up @@ -691,7 +693,7 @@ internal class IonManagedWriter_1_1_Test {
fun testWritingMacroDefinitions(description: String, macro: Macro, expectedSignatureAndBody: String) {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro foo () "foo") (macro null () "bar") (macro null $expectedSignatureAndBody)))
(:$ion::set_macros (:: (macro foo () "foo") (macro null () "bar") (macro null $expectedSignatureAndBody)))
""".trimIndent()

val actual = write {
Expand All @@ -709,24 +711,25 @@ internal class IonManagedWriter_1_1_Test {
// However, this should be held loosely.
val expected = """
$ion_1_1
$ion_encoding::(
(symbol_table ["foo","bar","baz"])
(macro_table
(:$ion::set_symbols
(:: "foo" "bar" "baz"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if we could have it write it like this instead, but we can leave that for another day.

(:set_symbols "foo" "bar" "baz")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into that briefly- in some cases it is being written that way, so it looks like there's some more thought needed on the tuning or something else I missed.

(:$ion::set_macros
(::
(macro null () "foo")
(macro null (x) (.0 (%x) "bar" (..) (.. "baz"))))
(macro null (x) (.0 (%x) "bar" (..) (.. "baz")))
)
)
$1
$2
$3
(:0)
(:1)
$ion_encoding::(
(symbol_table
$ion_encoding
["a","b","c"])
(macro_table
$ion_encoding
(macro null () "abc"))
(:$ion::add_symbols
(:: "a" "b" "c"))
(:$ion::add_macros
(::
(macro null () "abc")
)
)
$4
$5
Expand Down Expand Up @@ -773,7 +776,7 @@ internal class IonManagedWriter_1_1_Test {
fun `writeObject() should write something with a macro representation`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro Point2D (x y) {x:(%x),y:(%y)})))
(:$ion::set_macros (:: (macro Point2D (x y) {x:(%x),y:(%y)})))
(:Point2D 2 4)
""".trimIndent()

Expand Down Expand Up @@ -808,7 +811,7 @@ internal class IonManagedWriter_1_1_Test {
fun `writeObject() should write something with nested macro representation`() {
val expected = """
$ion_1_1
$ion_encoding::((macro_table (macro null (x*) (%x)) (macro Polygon (vertices+ compact_symbol::fill?) {vertices:[(%vertices)],fill:(.0 (%fill))}) (macro Point2D (x y) {x:(%x),y:(%y)})))
(:$ion::set_macros (:: (macro null (x*) (%x)) (macro Polygon (vertices+ compact_symbol::fill?) {vertices:[(%vertices)],fill:(.0 (%fill))}) (macro Point2D (x y) {x:(%x),y:(%y)})))
(:Polygon (:: (:Point2D 0 0) (:Point2D 0 1) (:Point2D 1 1) (:Point2D 1 0)) Blue)
""".trimIndent()

Expand Down
Loading