From 908a52b1614a3178a261fbc20f9534363fd87c83 Mon Sep 17 00:00:00 2001 From: Matthew Pope <81593196+popematt@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:18:51 -0800 Subject: [PATCH] Adds support for macros to IonRawBinaryWriter_1_1 (#668) --- .../java/com/amazon/ion/impl/IonWriter_1_1.kt | 6 +- .../ion/impl/bin/IonRawBinaryWriter.java | 4 +- .../ion/impl/bin/IonRawBinaryWriter_1_1.kt | 84 +++++- .../java/com/amazon/ion/impl/bin/OpCodes.java | 2 + .../com/amazon/ion/impl/bin/WriteBuffer.java | 19 +- .../impl/bin/IonRawBinaryWriterTest_1_1.kt | 262 ++++++++++++++++++ .../amazon/ion/impl/bin/WriteBufferTest.java | 18 +- 7 files changed, 368 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonWriter_1_1.kt index 4309a7f57e..ebc23d0dd6 100644 --- a/src/main/java/com/amazon/ion/impl/IonWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonWriter_1_1.kt @@ -119,10 +119,10 @@ interface IonWriter_1_1 { fun stepInStruct(delimited: Boolean) /** - * Steps into a stream. - * A stream is not a container in the Ion data model, but it is a container from an encoding perspective. + * Steps into an expression group. + * An expression group is not a container in the Ion data model, but it is a container from an encoding perspective. */ - fun stepInStream() + fun stepInExpressionGroup(delimited: Boolean) /** * Writes a macro invocation for the given macro name. diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index 5d43f6c093..d1a051b4b8 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -1070,12 +1070,12 @@ private void patchSingleByteTypedOptimisticValue(final byte type, final Containe if (info.length <= 0xD) { // we fit -- overwrite the type byte - buffer.writeUInt8At(info.position - 1, type | info.length); + buffer.writeByteAt(info.position - 1, type | info.length); } else { // side patch - buffer.writeUInt8At(info.position - 1, type | 0xE); + buffer.writeByteAt(info.position - 1, type | 0xE); addPatchPoint(info, info.position, 0, info.length); } } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt index 2e96a559e6..08d0238a11 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -7,6 +7,7 @@ import com.amazon.ion.impl.* import com.amazon.ion.impl.bin.IonEncoder_1_1.* import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.* import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.List +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.Macro import com.amazon.ion.impl.bin.Ion_1_1_Constants.* import com.amazon.ion.impl.bin.utf8.* import com.amazon.ion.util.* @@ -29,7 +30,7 @@ class IonRawBinaryWriter_1_1 internal constructor( SExp, Struct, Macro, - Stream, + ExpressionGroup, /** * Represents the top level stream. The [containerStack] always has [ContainerInfo] for [Top] at the bottom * of the stack so that we never have to check if [currentContainer] is null. @@ -55,7 +56,7 @@ class IonRawBinaryWriter_1_1 internal constructor( /** * Clears this [ContainerInfo] of old data and initializes it with the given new data. */ - fun reset(type: ContainerType? = null, position: Long = -1, isDelimited: Boolean = false) { + fun reset(type: ContainerType, position: Long, isDelimited: Boolean = false) { this.type = type this.isDelimited = isDelimited this.position = position @@ -336,7 +337,7 @@ class IonRawBinaryWriter_1_1 internal constructor( // To switch, we need to insert the sid-to-flexsym switch marker, unless... // if no fields have been written yet, we can just switch the op code of the struct. if (currentContainer.length == 0L) { - buffer.writeUInt8At(currentContainer.position, OpCodes.VARIABLE_LENGTH_STRUCT_WITH_FLEX_SYMS.toLong()) + buffer.writeByteAt(currentContainer.position, OpCodes.VARIABLE_LENGTH_STRUCT_WITH_FLEX_SYMS) } else { buffer.writeByte(SID_TO_FLEX_SYM_SWITCH_MARKER) currentContainer.length += 1 @@ -414,15 +415,42 @@ class IonRawBinaryWriter_1_1 internal constructor( } override fun stepInEExp(name: CharSequence) { - TODO("Not supported by the raw binary writer.") + throw UnsupportedOperationException("Binary writer requires macros to be invoked by their ID.") } override fun stepInEExp(id: Int) { - TODO("Not yet implemented") + confirm(numAnnotations == 0) { "Cannot annotate an E-Expression" } + + if (currentContainer.type == Struct && !hasFieldName) { + if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + buffer.writeByte(FlexInt.ZERO) + currentContainer.length++ + } + currentContainer = containerStack.push { it.reset(Macro, buffer.position()) } + if (id < 64) { + buffer.writeByte(id.toByte()) + } else { + val biasedId = id - 64 + val opCodeIdBits = FOUR_BIT_MASK and biasedId + val remainderOfId = biasedId shr 4 + buffer.writeByte((OpCodes.BIASED_E_EXPRESSION + opCodeIdBits).toByte()) + currentContainer.length += buffer.writeFlexUInt(remainderOfId) + } + + // No need to clear any of the annotation fields because we already asserted that there are no annotations + hasFieldName = false } - override fun stepInStream() { - TODO("Not yet implemented") + override fun stepInExpressionGroup(delimited: Boolean) { + confirm(numAnnotations == 0) { "Cannot annotate an expression group" } + confirm(currentContainer.type == Macro) { "Can only create an expression group in a macro invocation" } + currentContainer = containerStack.push { it.reset(ExpressionGroup, buffer.position(), delimited) } + if (delimited) { + buffer.writeByte(FlexInt.ZERO) + } else { + buffer.reserve(maxOf(1, lengthPrefixPreallocation)) + } + // No need to clear any of the annotation fields because we already asserted that there are no annotations } override fun stepOut() { @@ -448,6 +476,9 @@ class IonRawBinaryWriter_1_1 internal constructor( List, SExp, Struct -> { val contentLength = currentContainer.length if (contentLength <= 0xF && !currentContainer.usesFlexSym) { + // TODO: Right now, this is skipped if we switch to FlexSym after starting a struct + // because we have no way to differentiate a struct that started as FlexSym + // from a struct that switched to FlexSym. // Clean up any unused space that was pre-allocated. buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) val zeroLengthOpCode = when (currentContainer.type) { @@ -456,13 +487,31 @@ class IonRawBinaryWriter_1_1 internal constructor( Struct -> OpCodes.STRUCT_SID_ZERO_LENGTH else -> TODO("Unreachable") } - buffer.writeUInt8At(currentContainer.position, zeroLengthOpCode + contentLength) + buffer.writeByteAt(currentContainer.position, zeroLengthOpCode + contentLength) } else { thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) } } - Macro -> TODO() - Stream -> TODO() + Macro -> { + // No special action needed yet. + // However, when tag-less types and grouped parameters are implemented, + // we will need to go and update the presence bits + } + ExpressionGroup -> { + if (currentContainer.length == 0L) { + // It is not always safe to truncate like this without clearing the patch points for the + // truncated part of the buffer. However, it is safe to do so here because we can only get to + // this particular branch if this expression group is empty, ergo it contains no patch points. + buffer.truncate(currentContainer.position) + // Since 0 represents a delimited expression group, we cannot write the length. We must switch + // to an empty delimited expression group. + buffer.writeByte(FlexInt.ZERO) + buffer.writeByte(OpCodes.DELIMITED_END_MARKER) + thisContainerTotalLength = 2 + } else { + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation, relativePosition = 0) + } + } Annotations -> TODO("Unreachable.") Top -> throw IonException("Nothing to step out of.") } @@ -478,17 +527,23 @@ class IonRawBinaryWriter_1_1 internal constructor( /** * Writes the length of the current container and returns the number of bytes needed to do so. * Transparently handles PatchPoints as necessary. + * + * @param numPreAllocatedLengthPrefixBytes the number of bytes that were pre-allocated for the length prefix of the + * current container. + * @param relativePosition the position to write the length relative to the start of the current container (which + * includes the opcode, if any). */ - private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int): Int { + private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int, relativePosition: Long = 1): Int { + val lengthPosition = currentContainer.position + relativePosition val lengthPrefixBytesRequired = FlexInt.flexUIntLength(currentContainer.length) if (lengthPrefixBytesRequired == numPreAllocatedLengthPrefixBytes) { // We have enough space, so write in the correct length. - buffer.writeFlexIntOrUIntAt(currentContainer.position + 1, currentContainer.length, lengthPrefixBytesRequired) + buffer.writeFlexIntOrUIntAt(lengthPosition, currentContainer.length, lengthPrefixBytesRequired) } else { addPatchPointsToStack() // All ContainerInfos are in the stack, so we know that its patchPoint is non-null. currentContainer.patchPoint.assumeNotNull().apply { - oldPosition = currentContainer.position + 1 + oldPosition = lengthPosition oldLength = numPreAllocatedLengthPrefixBytes length = currentContainer.length } @@ -503,7 +558,8 @@ class IonRawBinaryWriter_1_1 internal constructor( // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. - // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and reset the cursor every time we track back to the ancestors. + // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and + // reset the cursor every time we track back to the ancestors. val stackIterator: ListIterator = containerStack.iterator() // Walk down the stack until we find an ancestor which already has a patch point while (stackIterator.hasNext() && stackIterator.next().patchPoint == null); diff --git a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java index 9544c65ba4..c9fd205b1e 100644 --- a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java +++ b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java @@ -6,6 +6,8 @@ public class OpCodes { private OpCodes() {} + public static final byte BIASED_E_EXPRESSION = 0x40; + public static final byte INTEGER_ZERO_LENGTH = 0x50; // 0x51-0x58 are additional lengths of integers. // 0x59 Reserved diff --git a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java index 1801a8125f..3cf3fed45b 100644 --- a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java +++ b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java @@ -101,10 +101,11 @@ public void truncate(final long position) { final int index = index(position); final int offset = offset(position); - final Block block = blocks.get(index); - this.index = index; - block.limit = offset; - current = block; + while (this.index != index) { + blocks.remove(this.index--); + } + current = blocks.get(index); + current.limit = offset; } /** @@ -1273,7 +1274,7 @@ else if (magnitude < VAR_INT_6_OCTET_MIN_VALUE && remaining >= 5) public void writeVarUIntDirect1At(final long position, final long value) { - writeUInt8At(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); + writeByteAt(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); } private void writeVarUIntDirect2StraddlingAt(final int index, final int offset, final long value) @@ -1301,7 +1302,11 @@ public void writeVarUIntDirect2At(long position, long value) block.data[offset + 1] = (byte) ((value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); } - public void writeUInt8At(final long position, final long value) + public void writeByteAt(final long position, final byte value) { + writeByteAt(position, (long) value); + } + + public void writeByteAt(final long position, final long value) { final int index = index(position); final int offset = offset(position); @@ -1376,7 +1381,7 @@ public void writeFlexIntOrUIntAt(final long position, final long value, final in allocateNewBlock(); } for (int i = 0; i < numBytes; i++) { - writeUInt8At(position + i, scratch[i]); + writeByteAt(position + i, scratch[i]); } } } diff --git a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt index 45d6257eb3..dc5c6fc9ae 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt @@ -893,6 +893,268 @@ class IonRawBinaryWriterTest_1_1 { } } + @Test + fun `write an e-expression`() { + assertWriterOutputEquals("00") { + stepInEExp(0) + stepOut() + } + assertWriterOutputEquals("3F") { + stepInEExp(63) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + " 64, 40 01", + " 65, 41 01", + " 80, 40 03", + " 1211, 4B 8F", + " 2111, 4F FF", + " 2112, 40 02 02", + " 71376, 40 A6 45", + " 262207, 4F FE FF", + " 262208, 40 04 00 02", + "33554495, 4F FC FF FF", + ) + fun `write an e-expression with a multi-byte biased id`(id: Int, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + stepInEExp(id) + stepOut() + } + } + + @Test + fun `write nested e-expressions`() { + // E-Expressions don't have length prefixes, so we're putting them inside lists + // so that we can check that the length gets propagated correctly to the parent + assertWriterOutputEquals( + """ + AB | List Length 11 + 1F | Macro 31 + A9 | List Length 9 + 40 01 | Macro 64 + A6 | List Length 6 + 43 03 | Macro 83 + A3 | List Length 3 + 44 5A 02 | Macro 2468 + """ + ) { + stepInList(false) + stepInEExp(31) + stepInList(false) + stepInEExp(64) + stepInList(false) + stepInEExp(83) + stepInList(false) + stepInEExp(2468) + repeat(8) { stepOut() } + } + } + + @Test + fun `write an e-expression in the field name position of a sid struct`() { + assertWriterOutputEquals( + """ + FC | Variable Length SID Struct + 11 | Length = 8 + 15 | SID 10 + 5E | true + 01 | switch to FlexSym encoding + 01 | FlexSym Escape Byte + 1F | Macro 31 (0x1F) + 01 | FlexSym Escape Byte + 40 01 | Macro 64 + """ + ) { + stepInStruct(false) + writeFieldName(10) + writeBool(true) + stepInEExp(31) + stepOut() + stepInEExp(64) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression in the field name position of a delimited struct`() { + assertWriterOutputEquals( + """ + F3 | Begin Delimited Struct + 01 | FlexSym Escape Byte + 1F | Macro 31 (0x1F) + 01 | FlexSym Escape Byte + F0 | End Delimiter + """ + ) { + stepInStruct(true) + stepInEExp(31) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression in the field name position of a variable length flex-sym struct`() { + assertWriterOutputEquals( + """ + FD | Variable Length FlexSym Struct + 05 | Length = 2 + 01 | FlexSym Escape Byte + 1F | Macro 31 (0x1F) + """ + ) { + stepInStruct(false) + stepInEExp(31) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression in the value position of a struct`() { + assertWriterOutputEquals( + """ + C2 | Struct length 2 + 03 | SID 1 + 01 | Macro 1 + """ + ) { + stepInStruct(false) + writeFieldName(1) + stepInEExp(1) + stepOut() + stepOut() + } + } + + @Test + fun `calling stepInEExp(String) should throw NotImplementedError`() { + assertThrows { + writeAsHexString { + stepInEExp("foo") + } + } + } + + @Test + fun `calling stepInEExp with an annotation should throw IonException`() { + assertWriterThrows { + writeAnnotations("foo") + stepInEExp(1) + } + } + + @Test + fun `write a delimited expression group`() { + assertWriterOutputEquals( + """ + 00 | Macro 0 + 01 | FlexUInt 0 (delimited expression group) + 5E | true + F0 | End of Expression Group + """ + ) { + stepInEExp(0) + stepInExpressionGroup(true) + writeBool(true) + stepOut() + stepOut() + } + } + + @Test + fun `write a prefixed expression group`() { + assertWriterOutputEquals( + """ + 00 | Macro 0 + 03 | Expression Group, Length = 1 + 5E | true + """ + ) { + stepInEExp(0) + stepInExpressionGroup(false) + writeBool(true) + stepOut() + stepOut() + } + } + + @Test + fun `write a prefixed expression group so long that it requires a patch point`() { + assertWriterOutputEquals( + """ + 00 | Macro 0 + FE 03 | Expression Group, Length = 255 + ${"5E ".repeat(255)} + """ + ) { + stepInEExp(0) + stepInExpressionGroup(false) + repeat(255) { writeBool(true) } + stepOut() + stepOut() + } + } + + @Test + fun `write an empty prefixed expression group`() { + // Regardless of whether we step in to a delimited or prefixed expression group, the empty expression group + // is always represented as a delimited expression group. + assertWriterOutputEquals("00 01 F0") { + stepInEExp(0) + stepInExpressionGroup(false) + stepOut() + stepOut() + } + } + + @Test + fun `write an empty delimited expression group`() { + assertWriterOutputEquals("00 01 F0") { + stepInEExp(0) + stepInExpressionGroup(true) + stepOut() + stepOut() + } + } + + @Test + fun `calling stepInExpressionGroup with an annotation should throw IonException`() { + assertWriterThrows { + stepInEExp(1) + writeAnnotations("foo") + stepInExpressionGroup(false) + } + } + + @Test + fun `calling stepInExpressionGroup while not directly in a Macro container should throw IonException`() { + assertWriterThrows { + stepInExpressionGroup(false) + } + assertWriterThrows { + stepInList(false) + stepInExpressionGroup(false) + } + assertWriterThrows { + stepInSExp(false) + stepInExpressionGroup(false) + } + assertWriterThrows { + stepInStruct(false) + stepInExpressionGroup(false) + } + assertWriterThrows { + stepInEExp(123) + stepInExpressionGroup(false) + stepInExpressionGroup(false) + } + } + /** * Writes this Ion, taken from https://amazon-ion.github.io/ion-docs/ * ``` diff --git a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java index 149726323a..358bd5f00f 100644 --- a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java @@ -949,9 +949,25 @@ public void testBytes() throws IOException @Test public void testTruncate() throws IOException { - buf.writeBytes("ARGLEFOOBARGLEDOO".getBytes("UTF-8")); + buf.writeBytes("ARGLE".getBytes("UTF-8")); buf.truncate(3); + // Check that the expected bytes are present assertBuffer("ARG".getBytes("UTF-8")); + // ...and check that we can resume writing without any issues + buf.writeBytes("LEFOOBARGLEDOO".getBytes("UTF-8")); + assertBuffer("ARGLEFOOBARGLEDOO".getBytes("UTF-8")); + } + + @Test + public void testTruncateAcrossBlocks() throws IOException + { + buf.writeBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); + buf.truncate(3); + // Check that the expected bytes are present + assertBuffer("ABC".getBytes("UTF-8")); + // ...and check that we can resume writing without any issues + buf.writeBytes("DEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); + assertBuffer("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); } @Test