Skip to content

Commit

Permalink
Adds read support for system symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt committed Sep 24, 2024
1 parent 4b373e5 commit cf69220
Show file tree
Hide file tree
Showing 15 changed files with 438 additions and 185 deletions.
106 changes: 71 additions & 35 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_MACRO_INVOCATION_ID;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1;
import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_SYSTEM_SYMBOL_OFFSET;
import static com.amazon.ion.util.IonStreamUtils.throwAsIonException;

/**
Expand Down Expand Up @@ -306,7 +308,7 @@ private static class ArgumentGroupMarker {
/**
* The major version of the Ion encoding currently being read.
*/
private int majorVersion = -1;
private int majorVersion = 1;

/**
* The minor version of the Ion encoding currently being read.
Expand Down Expand Up @@ -1490,24 +1492,23 @@ private long uncheckedReadFlexInt_1_1() {
* Marker's endIndex is set to the symbol ID value and its startIndex is set to -1. When this FlexSym wraps a
* delimited end marker, neither the Marker's startIndex nor its endIndex is set.
* @param markerToSet the marker to populate.
* @return the symbol ID value if one was present, otherwise -1.
* @return the user-space symbol ID value if one was present, otherwise -1.
*/
private long uncheckedReadFlexSym_1_1(Marker markerToSet) {
long result = uncheckedReadFlexInt_1_1();
if (result == 0) {
int nextByte = buffer[(int)(peekIndex++)];
if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if (nextByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
// Symbol zero.
markerToSet.endIndex = 0;
return 0;
}
if (nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
if (nextByte > FLEX_SYM_SYSTEM_SYMBOL_OFFSET || nextByte <= (byte) (FLEX_SYM_SYSTEM_SYMBOL_OFFSET + Byte.MAX_VALUE)) {
setSystemSymbolMarker(markerToSet, (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
return -1;
} else if (nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSym 0 may only precede symbol zero, empty string, or delimited end.");
throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end.");
}
markerToSet.typeId = IonTypeID.DELIMITED_END_ID;
return -1;
Expand All @@ -1523,6 +1524,12 @@ private long uncheckedReadFlexSym_1_1(Marker markerToSet) {
return result;
}

private static boolean isFlexSymSystemSymbol(int byteAfterEscapeCode) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches elsewhere. Is it worth it?
return byteAfterEscapeCode > FLEX_SYM_SYSTEM_SYMBOL_OFFSET
|| byteAfterEscapeCode <= (byte) (FLEX_SYM_SYSTEM_SYMBOL_OFFSET + Byte.MAX_VALUE);
}

/**
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false,
* `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the
Expand Down Expand Up @@ -1589,15 +1596,14 @@ private boolean slowReadFlexSym_1_1(Marker markerToSet) {
if (nextByte < 0) {
return true;
}
if ((byte) nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if ((byte) nextByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
// Symbol zero.
markerToSet.endIndex = 0;
return false;
}
if ((byte) nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
if (nextByte > FLEX_SYM_SYSTEM_SYMBOL_OFFSET && nextByte <= FLEX_SYM_SYSTEM_SYMBOL_OFFSET + Byte.MAX_VALUE) {
setSystemSymbolMarker(markerToSet, nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET);
return false;
} else if ((byte) nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
Expand Down Expand Up @@ -1645,6 +1651,13 @@ IonTypeID typeIdFor(int length) {
return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK];
}
},
SYSTEM_SYMBOL_ID {
@Override
IonTypeID typeIdFor(int length) {
// if (length > 1) throw new IllegalStateException("System Symbols always have a length of 1");
return SYSTEM_SYMBOL_VALUE;
}
},
STRUCT_END {
@Override
IonTypeID typeIdFor(int length) {
Expand All @@ -1661,11 +1674,12 @@ static FlexSymType classifySpecialFlexSym(int specialByte) {
if (specialByte < 0) {
return FlexSymType.INCOMPLETE;
}
if ((byte) specialByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if ((byte) specialByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
return FlexSymType.SYMBOL_ID;
}
if ((byte) specialByte == OpCodes.STRING_ZERO_LENGTH) {
return FlexSymType.INLINE_TEXT;
if (specialByte > 0x60 && specialByte < 0xE0) {
return FlexSymType.SYSTEM_SYMBOL_ID;
}
if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) {
return FlexSymType.STRUCT_END;
Expand All @@ -1692,7 +1706,12 @@ private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) {
if (result == 0) {
markerToSet.startIndex = peekIndex + 1;
markerToSet.endIndex = markerToSet.startIndex;
return FlexSymType.classifySpecialFlexSym(buffer[(int) peekIndex++] & SINGLE_BYTE_MASK);
int specialByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
FlexSymType type = FlexSymType.classifySpecialFlexSym(specialByte);
if (type == FlexSymType.SYSTEM_SYMBOL_ID) {
setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
}
return type;
} else if (result < 0) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
Expand Down Expand Up @@ -1720,11 +1739,19 @@ private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) {
result |= ~(-1 >>> Long.numberOfLeadingZeros(result));
}
if (result == 0) {
FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(slowReadByte());
int specialByte = slowReadByte();
FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(specialByte);
if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
}
if (markerToSet != null && flexSymType == FlexSymType.SYSTEM_SYMBOL_ID) {
// FIXME: See if we can set the SID in the endIndex here without causing the slow reader to get confused
// about where the end of the value is for tagless symbols.
// I.e. use setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.startIndex = peekIndex - 1;
}
return flexSymType;
} else if (result < 0) {
if (markerToSet != null) {
Expand Down Expand Up @@ -2229,26 +2256,28 @@ private void validateAnnotationWrapperEndIndex(long endIndex) {
}
}

/*
* The given Marker's endIndex is set to the system symbol ID value and its startIndex is set to -1
*/
private void setSystemSymbolMarker(Marker markerToSet, int systemSid) {
event = Event.START_SCALAR;
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.startIndex = -1;
markerToSet.endIndex = systemSid;
}

/**
* Sets the given marker to represent the current system token (system macro invocation or system symbol value).
* Before calling this method, `macroInvocationId` must be set from the one-byte FixedInt that represents the ID;
* positive values indicate a macro address, while negative values indicate a system symbol ID.
* @param valueTid the type ID of the system token.
* @param markerToSet the marker to set.
*/
private void setSystemTokenMarker(IonTypeID valueTid, Marker markerToSet) {
private void setSystemMacroInvocationMarker(Marker markerToSet) {
isSystemInvocation = true;
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = SYSTEM_MACRO_INVOCATION_ID;
markerToSet.startIndex = peekIndex;
if (macroInvocationId < 0) {
// This is a system symbol value.
event = Event.START_SCALAR;
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.endIndex = peekIndex;
} else {
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = valueTid;
markerToSet.endIndex = -1;
}
markerToSet.endIndex = -1;
}

/**
Expand Down Expand Up @@ -2293,9 +2322,9 @@ private void uncheckedReadMacroInvocationHeader(IonTypeID valueTid, Marker marke
setUserMacroInvocationMarker(valueTid, markerToSet, uncheckedReadFlexUInt_1_1());
return;
} else {
// Opcode 0xEF: system macro invocation or system symbol value.
// Opcode 0xEF: system macro invocation
macroInvocationId = buffer[(int) peekIndex++];
setSystemTokenMarker(valueTid, markerToSet);
setSystemMacroInvocationMarker(markerToSet);
return;
}
} else if (valueTid.length > 0) {
Expand Down Expand Up @@ -2461,7 +2490,7 @@ private boolean slowReadMacroInvocationHeader(IonTypeID valueTid, Marker markerT
}
// The downcast to byte then upcast to long results in sign extension, treating the byte as a FixedInt.
macroInvocationId = (byte) truncatedId;
setSystemTokenMarker(valueTid, markerToSet);
setSystemMacroInvocationMarker(markerToSet);
return false;
}
} else if (valueTid.length > 0) {
Expand Down Expand Up @@ -3055,7 +3084,9 @@ private Event slowNextValue() {
// This value was filled, but was skipped. Reset the fillDepth so that the reader does not think the
// next value was filled immediately upon encountering it.
refillableState.fillDepth = -1;
peekIndex = valueMarker.endIndex;
if (valueMarker.startIndex > -1) {
peekIndex = valueMarker.endIndex;
}
setCheckpointBeforeUnannotatedTypeId();
slowNextToken();
return event;
Expand Down Expand Up @@ -3095,7 +3126,9 @@ private long readFlexSymLengthAndType_1_1() {
flexSymType = uncheckedSkipFlexSym_1_1(valueMarker);
}
int lengthOfFlexSym = (int) (peekIndex - valueMarker.startIndex);
peekIndex = valueMarker.startIndex;
if (valueMarker.startIndex > -1) {
peekIndex = valueMarker.startIndex;
}
valueTid = flexSymType.typeIdFor(lengthOfFlexSym);
valueMarker.typeId = valueTid;
return lengthOfFlexSym;
Expand Down Expand Up @@ -3123,6 +3156,9 @@ private long calculateTaglessLengthAndType(TaglessEncoding taglessEncoding) {
default:
throw new IllegalStateException("Length is built into the primitive type's IonTypeID.");
}
if (valueTid == SYSTEM_SYMBOL_VALUE) {
return 1;
}
if (length >= 0) {
valueMarker.endIndex = peekIndex + length;
}
Expand Down
Loading

0 comments on commit cf69220

Please sign in to comment.