-
Notifications
You must be signed in to change notification settings - Fork 111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds support for macros to IonRawBinaryWriter_1_1 #668
Changes from all commits
2db9a46
c2774ef
a8d1653
08ff18c
77b5f5e
0239f79
195ca42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ import com.amazon.ion.impl.* | |
import com.amazon.ion.impl.bin.IonEncoder_1_1.* | ||
import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.* | ||
import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.List | ||
import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.Macro | ||
import com.amazon.ion.impl.bin.Ion_1_1_Constants.* | ||
import com.amazon.ion.impl.bin.utf8.* | ||
import com.amazon.ion.util.* | ||
|
@@ -29,7 +30,7 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
SExp, | ||
Struct, | ||
Macro, | ||
Stream, | ||
ExpressionGroup, | ||
/** | ||
* Represents the top level stream. The [containerStack] always has [ContainerInfo] for [Top] at the bottom | ||
* of the stack so that we never have to check if [currentContainer] is null. | ||
|
@@ -55,7 +56,7 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
/** | ||
* Clears this [ContainerInfo] of old data and initializes it with the given new data. | ||
*/ | ||
fun reset(type: ContainerType? = null, position: Long = -1, isDelimited: Boolean = false) { | ||
fun reset(type: ContainerType, position: Long, isDelimited: Boolean = false) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Incidental change—I realize that the default values are always supplied for both |
||
this.type = type | ||
this.isDelimited = isDelimited | ||
this.position = position | ||
|
@@ -336,7 +337,7 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
// To switch, we need to insert the sid-to-flexsym switch marker, unless... | ||
// if no fields have been written yet, we can just switch the op code of the struct. | ||
if (currentContainer.length == 0L) { | ||
buffer.writeUInt8At(currentContainer.position, OpCodes.VARIABLE_LENGTH_STRUCT_WITH_FLEX_SYMS.toLong()) | ||
buffer.writeByteAt(currentContainer.position, OpCodes.VARIABLE_LENGTH_STRUCT_WITH_FLEX_SYMS) | ||
} else { | ||
buffer.writeByte(SID_TO_FLEX_SYM_SWITCH_MARKER) | ||
currentContainer.length += 1 | ||
|
@@ -414,15 +415,42 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
} | ||
|
||
override fun stepInEExp(name: CharSequence) { | ||
TODO("Not supported by the raw binary writer.") | ||
throw UnsupportedOperationException("Binary writer requires macros to be invoked by their ID.") | ||
} | ||
|
||
override fun stepInEExp(id: Int) { | ||
TODO("Not yet implemented") | ||
confirm(numAnnotations == 0) { "Cannot annotate an E-Expression" } | ||
|
||
if (currentContainer.type == Struct && !hasFieldName) { | ||
if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() | ||
buffer.writeByte(FlexInt.ZERO) | ||
currentContainer.length++ | ||
} | ||
currentContainer = containerStack.push { it.reset(Macro, buffer.position()) } | ||
if (id < 64) { | ||
buffer.writeByte(id.toByte()) | ||
} else { | ||
val biasedId = id - 64 | ||
val opCodeIdBits = FOUR_BIT_MASK and biasedId | ||
val remainderOfId = biasedId shr 4 | ||
buffer.writeByte((OpCodes.BIASED_E_EXPRESSION + opCodeIdBits).toByte()) | ||
currentContainer.length += buffer.writeFlexUInt(remainderOfId) | ||
} | ||
|
||
// No need to clear any of the annotation fields because we already asserted that there are no annotations | ||
hasFieldName = false | ||
} | ||
|
||
override fun stepInStream() { | ||
TODO("Not yet implemented") | ||
override fun stepInExpressionGroup(delimited: Boolean) { | ||
confirm(numAnnotations == 0) { "Cannot annotate an expression group" } | ||
confirm(currentContainer.type == Macro) { "Can only create an expression group in a macro invocation" } | ||
currentContainer = containerStack.push { it.reset(ExpressionGroup, buffer.position(), delimited) } | ||
if (delimited) { | ||
buffer.writeByte(FlexInt.ZERO) | ||
} else { | ||
buffer.reserve(maxOf(1, lengthPrefixPreallocation)) | ||
} | ||
// No need to clear any of the annotation fields because we already asserted that there are no annotations | ||
} | ||
|
||
override fun stepOut() { | ||
|
@@ -448,6 +476,9 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
List, SExp, Struct -> { | ||
val contentLength = currentContainer.length | ||
if (contentLength <= 0xF && !currentContainer.usesFlexSym) { | ||
// TODO: Right now, this is skipped if we switch to FlexSym after starting a struct | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This is incidental. I was aware of this when I implemented structs, but I had since forgotten it, and then I rediscovered this while I was writing tests for E-Exps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you provide a little more detail here? What are the implications? That we can't reclaim space for FlexSym structs? Why not? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The scenario is when we have a struct that starts out as a SID struct, has at least one SID field before switching to FlexSyms, and has a total length of less than 16 bytes. The consequence of this limitation is that we're wasting a byte. E.g.:
Ideally, we could write this starting with It's not difficult to solve this—it just requires a way to check how the struct started out and a few more branches for the different scenarios—but I didn't really want to address it while the struct encoding is still in a bit of flux. |
||
// because we have no way to differentiate a struct that started as FlexSym | ||
// from a struct that switched to FlexSym. | ||
// Clean up any unused space that was pre-allocated. | ||
buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) | ||
val zeroLengthOpCode = when (currentContainer.type) { | ||
|
@@ -456,13 +487,31 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
Struct -> OpCodes.STRUCT_SID_ZERO_LENGTH | ||
else -> TODO("Unreachable") | ||
} | ||
buffer.writeUInt8At(currentContainer.position, zeroLengthOpCode + contentLength) | ||
buffer.writeByteAt(currentContainer.position, zeroLengthOpCode + contentLength) | ||
} else { | ||
thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) | ||
} | ||
} | ||
Macro -> TODO() | ||
Stream -> TODO() | ||
Macro -> { | ||
// No special action needed yet. | ||
// However, when tag-less types and grouped parameters are implemented, | ||
// we will need to go and update the presence bits | ||
} | ||
ExpressionGroup -> { | ||
if (currentContainer.length == 0L) { | ||
// It is not always safe to truncate like this without clearing the patch points for the | ||
// truncated part of the buffer. However, it is safe to do so here because we can only get to | ||
// this particular branch if this expression group is empty, ergo it contains no patch points. | ||
buffer.truncate(currentContainer.position) | ||
// Since 0 represents a delimited expression group, we cannot write the length. We must switch | ||
// to an empty delimited expression group. | ||
buffer.writeByte(FlexInt.ZERO) | ||
buffer.writeByte(OpCodes.DELIMITED_END_MARKER) | ||
thisContainerTotalLength = 2 | ||
} else { | ||
thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation, relativePosition = 0) | ||
} | ||
} | ||
Annotations -> TODO("Unreachable.") | ||
Top -> throw IonException("Nothing to step out of.") | ||
} | ||
|
@@ -478,17 +527,23 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
/** | ||
* Writes the length of the current container and returns the number of bytes needed to do so. | ||
* Transparently handles PatchPoints as necessary. | ||
* | ||
* @param numPreAllocatedLengthPrefixBytes the number of bytes that were pre-allocated for the length prefix of the | ||
* current container. | ||
* @param relativePosition the position to write the length relative to the start of the current container (which | ||
* includes the opcode, if any). | ||
*/ | ||
private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int): Int { | ||
private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int, relativePosition: Long = 1): Int { | ||
val lengthPosition = currentContainer.position + relativePosition | ||
val lengthPrefixBytesRequired = FlexInt.flexUIntLength(currentContainer.length) | ||
if (lengthPrefixBytesRequired == numPreAllocatedLengthPrefixBytes) { | ||
// We have enough space, so write in the correct length. | ||
buffer.writeFlexIntOrUIntAt(currentContainer.position + 1, currentContainer.length, lengthPrefixBytesRequired) | ||
buffer.writeFlexIntOrUIntAt(lengthPosition, currentContainer.length, lengthPrefixBytesRequired) | ||
} else { | ||
addPatchPointsToStack() | ||
// All ContainerInfos are in the stack, so we know that its patchPoint is non-null. | ||
currentContainer.patchPoint.assumeNotNull().apply { | ||
oldPosition = currentContainer.position + 1 | ||
oldPosition = lengthPosition | ||
oldLength = numPreAllocatedLengthPrefixBytes | ||
length = currentContainer.length | ||
} | ||
|
@@ -503,7 +558,8 @@ class IonRawBinaryWriter_1_1 internal constructor( | |
|
||
// If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already | ||
// have a patch point. No container can be smaller than the contents, so all outer layers also require patches. | ||
// Instead of allocating iterator, we share one iterator instance within the scope of the container stack and reset the cursor every time we track back to the ancestors. | ||
// Instead of allocating iterator, we share one iterator instance within the scope of the container stack and | ||
// reset the cursor every time we track back to the ancestors. | ||
Comment on lines
+561
to
+562
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Incidental formatting change so that the line isn't ridiculously long. |
||
val stackIterator: ListIterator<ContainerInfo> = containerStack.iterator() | ||
// Walk down the stack until we find an ancestor which already has a patch point | ||
while (stackIterator.hasNext() && stackIterator.next().patchPoint == null); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,10 +100,11 @@ public void truncate(final long position) | |
{ | ||
final int index = index(position); | ||
final int offset = offset(position); | ||
final Block block = blocks.get(index); | ||
this.index = index; | ||
block.limit = offset; | ||
current = block; | ||
while (this.index != index) { | ||
blocks.remove(this.index--); | ||
} | ||
Comment on lines
+103
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This fixes a bug in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How long has this bug existed? Does it require patching in master and/or previous versions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's been there all along, but I haven't looked into the potential impact. Since it's been there all along, it's even possible that the current implementation has a way to avoid the buggy behavior by cleaning the blocks at some other time. |
||
current = blocks.get(index); | ||
current.limit = offset; | ||
} | ||
|
||
/** | ||
|
@@ -1272,7 +1273,7 @@ else if (magnitude < VAR_INT_6_OCTET_MIN_VALUE && remaining >= 5) | |
|
||
public void writeVarUIntDirect1At(final long position, final long value) | ||
{ | ||
writeUInt8At(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); | ||
writeByteAt(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); | ||
} | ||
|
||
private void writeVarUIntDirect2StraddlingAt(final int index, final int offset, final long value) | ||
|
@@ -1300,7 +1301,11 @@ public void writeVarUIntDirect2At(long position, long value) | |
block.data[offset + 1] = (byte) ((value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); | ||
} | ||
|
||
public void writeUInt8At(final long position, final long value) | ||
public void writeByteAt(final long position, final byte value) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Convenience overload. If it's a perf problem we can remove it, but I bet the JIT is smart enough to optimize this out. |
||
writeByteAt(position, (long) value); | ||
} | ||
|
||
public void writeByteAt(final long position, final long value) | ||
{ | ||
final int index = index(position); | ||
final int offset = offset(position); | ||
|
@@ -1375,7 +1380,7 @@ public void writeFlexIntOrUIntAt(final long position, final long value, final in | |
allocateNewBlock(); | ||
} | ||
for (int i = 0; i < numBytes; i++) { | ||
writeUInt8At(position + i, scratch[i]); | ||
writeByteAt(position + i, scratch[i]); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ The
writeUInt8At
method got renamed towriteByteAt
because I found it misleading to usewriteUInt8
to writeFlexInt.ZERO
. I checked all usages of the method, and the new namewriteByteAt
works for all of them. None of the existing usages were specifically trying to write an 8-bit integer; rather they were writing either a sentinel (such as an op code) or 8 bits of a larger value.