Skip to content

Commit

Permalink
Prepares to support reading binary Ion 1.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
tgregg committed Nov 15, 2023
1 parent ab5d14a commit c510d1c
Show file tree
Hide file tree
Showing 6 changed files with 454 additions and 153 deletions.
73 changes: 67 additions & 6 deletions src/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -919,12 +919,50 @@ private boolean slowReadFieldName_1_0() {

/* ---- Ion 1.1 ---- */

private long uncheckedReadVarUInt_1_1() {
throw new UnsupportedOperationException();
/**
* Reads a FlexUInt. NOTE: the FlexUInt must fit in a `long`. This must only be called when it is known that the
* buffer already contains all the bytes in the FlexUInt.
* @return the value.
*/
private long uncheckedReadFlexUInt_1_1() {
int currentByte = buffer[(int) peekIndex++] & 0xFF;
if ((currentByte & 1) == 1) { // TODO perf: analyze whether the special case check is a net positive
// Single-byte.
return currentByte >>> 1;
}
if (currentByte == 0) { // The first byte is 0, so there are at least 9 bytes.
throw new IonException("Found a VarUInt that was too large to fit in a `long`");
}
// TODO perf: try putting the rest in its own method
byte length = (byte) (Integer.numberOfTrailingZeros(currentByte) + 1);
long result = currentByte >>> length;
for (byte i = 1; i < length; i++) {
result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - length));
}
return result;
}

private long slowReadVarUInt_1_1() {
throw new UnsupportedOperationException();
/**
* Reads a FlexUInt, ensuring enough data is available in the buffer. NOTE: the FlexUInt must fit in a `long`.
* @return the value.
*/
private long slowReadFlexUInt_1_1() {
// TODO perf: try 1-byte special case checks. Least-significant bits of 1 indicate 1-byte
int currentByte = slowPeekByte();
if (currentByte == 0) {
throw new IonException("Found a VarUInt that was too large to fit in a `long`");
}
byte length = (byte) (Integer.numberOfTrailingZeros(currentByte) + 1);
long result = currentByte >>> length;
int numberOfBytesRead = 0;
while (numberOfBytesRead++ < length - 1) {
currentByte = slowReadByte();
if (currentByte < 0) {
return -1;
}
result |= ((long) currentByte << (8 * numberOfBytesRead - length));
}
return result;
}

private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
Expand All @@ -935,8 +973,29 @@ private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
throw new UnsupportedOperationException();
}

/**
* Calculates the end index for the given type ID and sets `event` based on the type of value encountered, if any.
* At the time of invocation, `peekIndex` must point to the first byte after the value's type ID byte. After return,
* `peekIndex` will point to the first byte in the value's representation, or, in the case of a NOP pad, the first
* byte that follows the pad.
* @param valueTid the type ID of the value.
* @param isAnnotated true if the value is annotated.
* @return the end index of the value or NOP pad.
*/
private long calculateEndIndex_1_1(IonTypeID valueTid, boolean isAnnotated) {
throw new UnsupportedOperationException();
if (valueTid.isDelimited) {
event = Event.START_CONTAINER;
return DELIMITED_MARKER;
}
long endIndex = (valueTid.variableLength ? uncheckedReadFlexUInt_1_1() : valueTid.length) + peekIndex;
if (valueTid.type != null && valueTid.type.ordinal() >= LIST_TYPE_ORDINAL) {
event = Event.START_CONTAINER;
} else if (valueTid.isNopPad) {
uncheckedSeekPastNopPad(endIndex, isAnnotated);
} else {
event = Event.START_SCALAR;
}
return endIndex;
}

private void uncheckedReadFieldName_1_1() {
Expand Down Expand Up @@ -1098,6 +1157,8 @@ private void readIvm() {
}
if (minorVersion == 0) {
typeIds = IonTypeID.TYPE_IDS_1_0;
} else if (minorVersion == 1) {
typeIds = IonTypeID.TYPE_IDS_1_1;
} else {
throw new IonException(String.format("Unsupported Ion version: %d.%d", majorVersion, minorVersion));
}
Expand Down Expand Up @@ -1279,7 +1340,7 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
if (!fillAt(peekIndex, 2)) {
return true;
}
valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadVarUInt_1_1();
valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadFlexUInt_1_1();
if (valueLength < 0) {
return true;
}
Expand Down
217 changes: 209 additions & 8 deletions src/com/amazon/ion/impl/IonTypeID.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,40 @@ final class IonTypeID {
null // The 0xF type code is illegal in Ion 1.0.
};

private static final IonType[] BINARY_TOKEN_TYPES_1_1 = new IonType[] {
null, // 0: macro invocation
null, // 1: macro invocation
null, // 2: macro invocation
null, // 3: macro invocation
null, // 4: macro invocation
null, // 5: int, float, bool
IonType.DECIMAL,
IonType.TIMESTAMP,
IonType.STRING,
IonType.SYMBOL,
IonType.LIST,
IonType.SEXP,
IonType.STRUCT, // symbol ID field names
IonType.STRUCT, // FlexSym field names
null, // E: symbol ID, annotated value, NOP, null, system macro invocation
null // F: variable length macro, variable length of all types, delimited start/end
};

// Singleton invalid type ID.
private static final IonTypeID ALWAYS_INVALID_TYPE_ID = new IonTypeID((byte) 0xFF, 0);

// Pre-compute all possible type ID bytes.
static final IonTypeID[] TYPE_IDS_NO_IVM;
static final IonTypeID[] TYPE_IDS_1_0;
static final IonTypeID[] TYPE_IDS_1_1;
static {
TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES];
TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES];
TYPE_IDS_1_1 = new IonTypeID[NUMBER_OF_BYTES];
for (int b = 0x00; b < NUMBER_OF_BYTES; b++) {
TYPE_IDS_NO_IVM[b] = ALWAYS_INVALID_TYPE_ID;
TYPE_IDS_1_0[b] = new IonTypeID((byte) b, 0);
TYPE_IDS_1_1[b] = new IonTypeID((byte) b, 1);
}
}

Expand All @@ -72,15 +94,15 @@ final class IonTypeID {
final byte lowerNibble;
final boolean isValid;
final boolean isNegativeInt;
final boolean isTemplateInvocation; // Unused in Ion 1.0
final int templateId; // Unused in Ion 1.0
final boolean isDelimited; // Unused in Ion 1.0
final boolean isMacroInvocation;
final int macroId;
final boolean isDelimited;
// For structs, denotes whether field names are VarSyms. For symbols, denotes whether the text is inline.
// For annotation wrappers, denotes whether tokens are VarSyms.
final boolean isInlineable; // Unused in Ion 1.0
final boolean isInlineable;

/**
* Determines whether the Ion spec allows this particular upperNibble/lowerNibble pair.
* Determines whether the Ion 1.0 spec allows this particular upperNibble/lowerNibble pair.
*/
private static boolean isValid_1_0(byte upperNibble, byte lowerNibble, IonType type) {
if (upperNibble == TYPE_CODE_INVALID) {
Expand Down Expand Up @@ -109,6 +131,20 @@ private static boolean isValid_1_0(byte upperNibble, byte lowerNibble, IonType t
return true;
}

/**
* Determines whether the Ion 1.1 spec allows this particular upperNibble/lowerNibble pair.
*/
private static boolean isValid_1_1(byte id) {
return !(
id == (byte) 0x59
|| id == (byte) 0xC1
|| id == (byte) 0xD0
|| id == (byte) 0xD1
|| id == (byte) 0xE0
|| id == (byte) 0xEE
);
}

private IonTypeID(byte id, int minorVersion) {
if (minorVersion == 0) {
byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK);
Expand Down Expand Up @@ -136,12 +172,177 @@ private IonTypeID(byte id, int minorVersion) {
}
this.isNegativeInt = type == IonType.INT && upperNibble == NEGATIVE_INT_TYPE_CODE;
this.length = length;
this.isTemplateInvocation = false;
this.templateId = -1;
this.isMacroInvocation = false;
this.macroId = -1;
this.isDelimited = false;
this.isInlineable = false;
} else {
throw new IllegalStateException("Only Ion 1.0 is currently supported.");
isValid = isValid_1_1(id);
byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK);
// For 0xF0 (delimited end byte) the entire byte is included. This avoids having to create a separate field
// just to identify this byte.
lowerNibble = (id == (byte) 0xF0) ? (byte) 0xF0 : (byte) (id & LOW_NIBBLE_BITMASK);
isNegativeInt = false; // Not applicable for Ion 1.1; sign is conveyed by the representation.
// 0xF4 is a length-prefixed macro invocation; 0xEF is a system macro invocation.
isMacroInvocation = upperNibble <= 0x4 || id == (byte) 0xF4 || id == (byte) 0xEF;
boolean isNopPad = false;
boolean isNull = false;
int length = -1;
if (isMacroInvocation) {
if (upperNibble == 0x4) {
variableLength = true;
// This isn't the whole macro ID, but it's all the relevant bits from the type ID byte (the 4
// least-significant bits).
macroId = lowerNibble;
} else if (upperNibble < 0x4){
variableLength = false;
macroId = id;
} else {
// System or length-prefixed macro invocation.
variableLength = upperNibble == 0xF;
macroId = -1;
}
type = null;
isInlineable = false;
} else {
macroId = -1;
variableLength =
(upperNibble == 0xF && lowerNibble >= 0x4) // Variable length, all types.
|| (upperNibble == 0x6 && lowerNibble == 0xF) // Decimal with negative-zero coefficient.
|| (upperNibble == 0xE && lowerNibble == 0x6) // Variable length annotation SIDs.
|| (upperNibble == 0xE && lowerNibble == 0x9) // Variable length annotation FlexSyms.
|| (upperNibble == 0xE && lowerNibble == 0xD); // Variable length NOP.
isInlineable =
// struct with VarSym field names.
(upperNibble == 0xD && lowerNibble >= 0x2)
// Delimited struct, variable-length symbol, variable-length struct with FlexSym field names.
|| (upperNibble == 0xF && (lowerNibble == 0x3 || lowerNibble == 0x9 || lowerNibble == 0xD))
// Annotation wrappers with VarSyms.
|| (upperNibble == 0xE && lowerNibble >= 0x7 && lowerNibble <= 9)
// Symbol values with inline text.
|| upperNibble == 0x9;
IonType typeFromUpperNibble = BINARY_TOKEN_TYPES_1_1[upperNibble];
if (typeFromUpperNibble == null) {
if (!isValid) {
type = null;
} else if (upperNibble == 0x5) {
if (lowerNibble <= 0x8) {
type = IonType.INT;
length = lowerNibble;
} else if (lowerNibble >= 0xE) {
type = IonType.BOOL;
length = 0;
} else {
type = IonType.FLOAT;
if (lowerNibble == 0xA) {
length = 0; // 0e0
} else if (lowerNibble == 0xB) {
length = 2;
} else if (lowerNibble == 0xC) {
length = 4;
} else if (lowerNibble == 0xD) {
length = 8;
}
}
} else if (upperNibble == 0xE) {
if (lowerNibble <= 0x3) {
type = IonType.SYMBOL;
length = lowerNibble == 0x3 ? -1 : lowerNibble;
} else if (lowerNibble <= 0x9) {
type = ION_TYPE_ANNOTATION_WRAPPER;
} else if (lowerNibble == 0xA) {
// null.null
type = IonType.NULL;
isNull = true;
length = 0;
} else if (lowerNibble == 0xB) {
// Typed null. Type byte follows.
type = null;
isNull = true;
} else if (lowerNibble <= 0xD) {
isNopPad = true;
type = null;
length = variableLength ? -1 : 0;
} else { // 0xF
// System macro invocation.
type = null;
}
} else { // 0xF
if (lowerNibble == 0) {
// Delimited end
type = null;
length = 0;
} else if (lowerNibble == 0x3 || lowerNibble == 0xC || lowerNibble == 0xD) {
type = IonType.STRUCT;
} else if (lowerNibble == 0x5) {
type = IonType.INT;
} else if (lowerNibble == 0x6) {
type = IonType.DECIMAL;
} else if (lowerNibble == 0x7) {
type = IonType.TIMESTAMP;
} else if (lowerNibble == 0x9) {
type = IonType.SYMBOL;
} else if (lowerNibble == 0x8) {
type = IonType.STRING;
} else if (lowerNibble == 0xE) {
type = IonType.BLOB;
} else if (lowerNibble == 0xF) {
type = IonType.CLOB;
} else if (lowerNibble == 0x1 || lowerNibble == 0xA) {
type = IonType.LIST;
} else if (lowerNibble == 0x2 || lowerNibble == 0xB) {
type = IonType.SEXP;
} else { // 0x4
// Variable length macro invocation
type = null;
}
}
} else {
type = typeFromUpperNibble;
if (type == IonType.TIMESTAMP) {
// Short-form timestamps. Long-form timestamps use the upper nibble 0xF, forcing them to take
// the previous branch.
switch (lowerNibble) {
case 0x0:
length = 1;
break;
case 0x1:
case 0x2:
length = 2;
break;
case 0x3:
length = 4;
break;
case 0x4:
case 0x8:
case 0x9:
length = 5;
break;
case 0x5:
length = 6;
break;
case 0x6:
case 0xA:
length = 7;
break;
case 0x7:
case 0xB:
length = 8;
break;
case 0xC:
length = 9;
break;
}
} else if (type != IonType.DECIMAL || lowerNibble != 0xF) {
// Negative-zero coefficient decimals are always variable-length.
length = lowerNibble;
}
}
}
isDelimited = upperNibble == 0xF && lowerNibble <= 0x3;
this.isNopPad = isNopPad;
this.isNull = isNull;
this.length = length;
}
}

Expand Down
10 changes: 9 additions & 1 deletion src/com/amazon/ion/impl/_Private_IonConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,15 @@ public static final boolean isSurrogate(int c) {
(byte) 0xEA };

/**
* The number of bytes in {@link #BINARY_VERSION_MARKER_1_0}.
* The byte sequence indicating use of Ion 1.0 binary format.
*/
public static final byte[] BINARY_VERSION_MARKER_1_1 = { (byte) 0xE0,
(byte) 0x01,
(byte) 0x01,
(byte) 0xEA };

/**
* The number of bytes in {@link #BINARY_VERSION_MARKER_1_0}
*/
public static final int BINARY_VERSION_MARKER_SIZE =
BINARY_VERSION_MARKER_1_0.length;
Expand Down
Loading

0 comments on commit c510d1c

Please sign in to comment.