Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for reading binary Ion 1.1 nulls, booleans, ints, and floats. #641

Merged
merged 4 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions src/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,10 @@ private long uncheckedReadFlexUInt_1_1() {
*/
private long slowReadFlexUInt_1_1() {
// TODO perf: try 1-byte special case checks. Least-significant bits of 1 indicate 1-byte
int currentByte = slowPeekByte();
int currentByte = slowReadByte();
if (currentByte < 0) {
return -1;
}
if (currentByte == 0) {
throw new IonException("Found a VarUInt that was too large to fit in a `long`");
}
Expand Down Expand Up @@ -1262,6 +1265,9 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota
if (endIndex > limit) {
isValueIncomplete = true;
}
if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) {
valueTid = IonTypeID.NULL_TYPE_IDS_1_1[buffer[(int)(peekIndex++) & SINGLE_BYTE_MASK]];
}
}
markerToSet.typeId = valueTid;
if (event == Event.START_CONTAINER) {
Expand Down Expand Up @@ -1306,6 +1312,13 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated,
}
return true;
}
if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) {
int nullTypeIndex = slowReadByte();
if (nullTypeIndex < 0) {
return true;
}
valueTid = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex];
}
markerToSet.typeId = valueTid;
if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER) {
return true;
Expand Down Expand Up @@ -1335,11 +1348,6 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
if (valueTid.isDelimited) {
endIndex = DELIMITED_MARKER;
} else if (valueTid.variableLength) {
// At this point the value must be at least 2 more bytes: 1 for the smallest-possible value length
// and 1 for the smallest-possible value representation.
if (!fillAt(peekIndex, 2)) {
return true;
}
valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadFlexUInt_1_1();
if (valueLength < 0) {
return true;
Expand Down
228 changes: 153 additions & 75 deletions src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade
private static final int LOWER_SEVEN_BITS_BITMASK = 0x7F;

private static final int SINGLE_BYTE_MASK = 0xFF;
private static final int TWO_BYTE_MASK = 0xFFFF;

// Isolates the lowest six bits in a byte.
private static final int LOWER_SIX_BITS_BITMASK = 0x3F;
Expand Down Expand Up @@ -64,7 +65,8 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade
// The second-most significant bit in the most significant byte of a VarInt is the sign.
private static final int VAR_INT_SIGN_BITMASK = 0x40;

// 32-bit floats must declare length 4.
private static final int FLOAT_16_BYTE_LENGTH = 2;

private static final int FLOAT_32_BYTE_LENGTH = 4;

// Initial capacity of the ArrayList used to hold the symbol IDs of the annotations on the current value.
Expand Down Expand Up @@ -376,8 +378,8 @@ private Timestamp readTimestamp_1_0() {
offset = readVarInt_1_0(firstByte);
}
int year = readVarUInt_1_0();
int month = 0;
int day = 0;
int month = 1;
int day = 1;
int hour = 0;
int minute = 0;
int second = 0;
Expand Down Expand Up @@ -475,98 +477,120 @@ private int readVarSym_1_1(Marker marker) {
throw new UnsupportedOperationException();
}

private BigDecimal readBigDecimal_1_1() {
throw new UnsupportedOperationException();
/**
* Reads a FixedInt into a long. After this method returns, `peekIndex` points to the first byte after the end of
* the FixedInt.
* @return the value.
*/
private long readFixedInt_1_1() {
if (peekIndex >= valueMarker.endIndex) {
return 0;
}
long startIndex = peekIndex;
peekIndex = valueMarker.endIndex;
// Note: the following line performs sign extension via the cast to long without masking with 0xFF.
long value = buffer[(int) --peekIndex];
while (peekIndex > startIndex) {
value = (value << 8) | (buffer[(int) --peekIndex] & SINGLE_BYTE_MASK);
}
peekIndex = valueMarker.endIndex;
return value;
}

private Decimal readDecimal_1_1() {
throw new UnsupportedOperationException();
/**
* Copies a FixedInt or FixedUInt into scratch space, converting it to its equivalent big-endian two's complement
* representation. If the provided length is longer than the actual length of the value, the most significant
* byte in the two's complement representation will be zero.
* @param startIndex the index of the second byte in the FixedInt or FixedUInt representation.
* @param length the number of bytes remaining in the FixedInt or FixedUInt representation.
* @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value.
*/
private byte[] copyFixedIntOrFixedUIntAsTwosComplementBytes(long startIndex, int length) {
// FixedInt is a little-endian two's complement representation. Simply reverse the bytes.
byte[] bytes = getScratchForSize(length);
// Clear the most significant byte in case the scratch space is padded to accommodate an unsigned value with
// its highest bit set.
bytes[0] = 0;
int copyIndex = bytes.length;
for (long i = startIndex; i < valueMarker.endIndex; i++) {
bytes[--copyIndex] = buffer[(int) i];
}
peekIndex = valueMarker.endIndex;
return bytes;
}

private long readLong_1_1() {
/**
* Reads a FixedInt or FixedUInt value into a BigInteger.
* @param length the length of the two's complement representation of the value. For FixedInts, this is always
* equal to the length of the value; for FixedUInts, this is one byte larger than the length of the
* value if the highest bit in the unsigned representation is set.
* @return the value.
*/
private BigInteger readFixedIntOrFixedUIntAsBigInteger_1_1(int length) {
BigInteger value;
if (length > 0) {
value = new BigInteger(copyFixedIntOrFixedUIntAsTwosComplementBytes(peekIndex, length));
} else {
value = BigInteger.ZERO;
}
return value;
}

private BigDecimal readBigDecimal_1_1() {
throw new UnsupportedOperationException();
}

private BigInteger readBigInteger_1_1() {
private Decimal readDecimal_1_1() {
throw new UnsupportedOperationException();
}

/**
* Copies a FlexUInt into scratch space, converting it to its equivalent big-endian two's complement representation.
* @param firstByte the first (least-significant) byte in the FlexUInt representation.
* @param bitsToShiftRight the number of continuation bits that must be shifted out of every byte.
* @param startIndex the index of the second byte in the FlexUInt representation.
* @param length the number of bytes remaining in the FlexUInt representation.
* @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value.
* Reads the FixedInt bounded by `valueMarker` into a `long`.
* @return the value.
*/
private byte[] copyFlexUIntAsTwosComplementBytes(int firstByte, int bitsToShiftRight, long startIndex, int length) {
// If the most significant bit is set, the value would be interpreted as a negative two's complement integer. To
// avoid that, make sure the most significant byte in the copy is 0 by over-allocating the destination buffer.
// Additionally, one more byte than 'length' is always required because 'length' does not include the first
// byte.
byte[] bytes = getScratchForSize(length + 1 + ((buffer[(int) startIndex + length - 1] < 0) ? 1 : 0));
bytes[0] = 0;
int copyIndex = bytes.length;
bytes[--copyIndex] = (byte) (firstByte >>> bitsToShiftRight);
int lowerBitsMask = ~(-1 << bitsToShiftRight);
for (int i = 0; i < length; i++) {
byte b = buffer[(int) startIndex + i];
// The following implements a byte-by-byte bit shift. The lower bits in each byte are or'd with the higher
// bits from the previous byte.
bytes[copyIndex] |= (byte) ((b & lowerBitsMask) << (8 - bitsToShiftRight));
bytes[--copyIndex] = (byte) ((b & SINGLE_BYTE_MASK) >>> bitsToShiftRight);
}
peekIndex = startIndex + length;
return bytes;
private long readLong_1_1() {
peekIndex = valueMarker.startIndex;
return readFixedInt_1_1();
}

/**
* Reads an FlexUInt value into a BigInteger.
* @return the value as a BigInteger.
* Reads the FixedInt bounded by `valueMarker` into a BigInteger.
* @return the value.
*/
private BigInteger readFlexUIntAsBigInteger_1_1() {
BigInteger value;
int currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
int length = 0;
while (currentByte == 0) {
// Each byte of continuation bits without a set bit adds 8 to the length of the FlexUInt, but since the
// length includes the continuation byte(s), each empty byte adds a net 7 to the total length.
length += 7;
currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
}
int numberOfLengthBits = Integer.numberOfTrailingZeros(currentByte);
length += numberOfLengthBits;
if (length > 0) {
// NOTE: copying into scratch space is required because the encoded bytes, which are unsigned little-endian,
// need to be translated into two's complement big-endian bytes as required by this BigInteger constructor.
// This is expensive, but is cheaper than using arithmetic operations on a BigInteger directly, as this
// would require a new BigInteger to be allocated for each intermediate step.
value = new BigInteger(copyFlexUIntAsTwosComplementBytes(currentByte, numberOfLengthBits + 1, peekIndex, length));
} else {
value = BigInteger.ZERO;
}
return value;
private BigInteger readBigInteger_1_1() {
peekIndex = valueMarker.startIndex;
return readFixedIntOrFixedUIntAsBigInteger_1_1((int) (valueMarker.endIndex - peekIndex));
}

/**
* Reads the fraction component of an Ion 1.1 long form timestamp.
* @return the value as a BigDecimal.
*/
private BigDecimal readTimestampFraction_1_1() {
// The fractional seconds are encoded as a (coefficient, scale) pair,
// The fractional seconds are encoded as a (scale, coefficient) pair,
// which is similar to a decimal. The primary difference is that the scale represents a negative
// exponent because it is illegal for the fractional seconds value to be greater than or equal to 1.0
// or less than 0.0. The coefficient is encoded as a FlexUInt (instead of FlexInt) to prevent the
// encoding of fractional seconds less than 0.0. The scale is encoded as a FixedUInt (instead of FixedInt)
// or less than 0.0. The coefficient is encoded as a FixedUInt (instead of FixedInt) to prevent the
// encoding of fractional seconds less than 0.0. The scale is encoded as a FlexUInt (instead of FlexInt)
// to discourage the encoding of decimal numbers greater than 1.0.
BigDecimal value;
peekIndex = valueMarker.startIndex + L_TIMESTAMP_SECOND_BYTE_LENGTH;
if (buffer[(int) peekIndex] != 0) {
int scale = (int) readFlexUInt_1_1();
if (peekIndex >= valueMarker.endIndex) {
return BigDecimal.valueOf(0, scale);
}
int length = (int) (valueMarker.endIndex - peekIndex);
// Since the coefficient is stored in a FixedUInt, some 8-byte values cannot fit in a signed 8-byte long.
// Take the quick path for values up to 7 bytes rather than performing additional checks. This should cover
// almost all real-world timestamp fractions.
if (length <= 7) {
// No need to allocate a BigInteger to hold the coefficient.
value = BigDecimal.valueOf(readFlexUInt_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex));
value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale);
} else {
// The coefficient may overflow a long, so a BigInteger is required.
value = new BigDecimal(readFlexUIntAsBigInteger_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex));
// If the most-significant bit is set, pad the length by one byte so that the value remains unsigned.
length += (buffer[(int) valueMarker.endIndex - 1] < 0) ? 1 : 0;
value = new BigDecimal(readFixedIntOrFixedUIntAsBigInteger_1_1(length), scale);
}
if (BigDecimal.ONE.compareTo(value) < 1) {
throw new IllegalArgumentException(String.format("Fractional seconds %s must be greater than or equal to 0 and less than 1", value));
Expand All @@ -580,8 +604,8 @@ private BigDecimal readTimestampFraction_1_1() {
*/
private Timestamp readTimestampLongForm_1_1() {
int year;
int month = 0;
int day = 0;
int month = 1;
int day = 1;
int hour = 0;
int minute = 0;
int second = 0;
Expand Down Expand Up @@ -651,8 +675,8 @@ private Timestamp readTimestamp_1_1() {
}
Timestamp.Precision precision = S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET[valueTid.lowerNibble];
int year = 0;
int month = 0;
int day = 0;
int month = 1;
int day = 1;
int hour = 0;
int minute = 0;
int second = 0;
Expand Down Expand Up @@ -750,8 +774,13 @@ private Timestamp readTimestamp_1_1() {
}
}

/**
* Reads the boolean value using the type ID of the current value.
* @return the value.
*/
private boolean readBoolean_1_1() {
throw new UnsupportedOperationException();
// Boolean 'true' is 0x5E; 'false' is 0x5F.
return valueTid.lowerNibble == 0xE;
}

@Override
Expand Down Expand Up @@ -823,15 +852,16 @@ public IntegerSize getIntegerSize() {
return null;
}
prepareScalar();
if (valueTid.length < 0) {
int length = valueTid.variableLength ? ((int) (valueMarker.endIndex - valueMarker.startIndex)) : valueTid.length;
if (length < 0) {
return IntegerSize.BIG_INTEGER;
} else if (valueTid.length < INT_SIZE_IN_BYTES) {
} else if (length < INT_SIZE_IN_BYTES) {
return IntegerSize.INT;
} else if (valueTid.length == INT_SIZE_IN_BYTES) {
} else if (length == INT_SIZE_IN_BYTES) {
return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG;
} else if (valueTid.length < LONG_SIZE_IN_BYTES) {
} else if (length < LONG_SIZE_IN_BYTES) {
return IntegerSize.LONG;
} else if (valueTid.length == LONG_SIZE_IN_BYTES) {
} else if (length == LONG_SIZE_IN_BYTES) {
return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER;
}
return IntegerSize.BIG_INTEGER;
Expand Down Expand Up @@ -965,6 +995,49 @@ public int intValue() {
return (int) longValue();
}

// IEEE-754 half-precision (s=sign, e=exponent, f=fraction): seee_eeff_ffff_ffff
private static final int FLOAT_16_SIGN_MASK = 0b1000_0000_0000_0000;
private static final int FLOAT_16_EXPONENT_MASK = 0b0111_1100_0000_0000;
private static final int FLOAT_16_FRACTION_MASK = 0b0000_0011_1111_1111;

// float64 bias: 1023; float16 bias: 15. Shift left to align with the masked exponent bits.
private static final int FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION = (1023 - 15) << Integer.numberOfTrailingZeros(FLOAT_16_EXPONENT_MASK);
// The float16 sign bit has bit index 15; the float64 sign bit has bit index 63.
private static final int FLOAT_16_TO_64_SIGN_SHIFT = 63 - 15;
// The 5 float16 exponent bits start at index 10; the 11 float64 exponent bits start at index 52.
private static final int FLOAT_16_TO_64_EXPONENT_SHIFT = 52 - 10;
// The most significant float16 fraction bit is at index 9; the most significant float64 fraction bit is at index 51.
private static final int FLOAT_16_TO_64_FRACTION_SHIFT = 51 - 9;

/**
* Reads the next two bytes from the given ByteBuffer as a 16-bit float, returning the value as a Java double.
* @param byteBuffer a buffer positioned at the first byte of the 16-bit float.
* @return the value.
*/
private static double readFloat16(ByteBuffer byteBuffer) {
int bits = byteBuffer.getShort() & TWO_BYTE_MASK;
int sign = bits & FLOAT_16_SIGN_MASK;
int exponent = bits & FLOAT_16_EXPONENT_MASK;
int fraction = bits & FLOAT_16_FRACTION_MASK;
if (exponent == 0) {
if (fraction == 0) {
return sign == 0 ? -0e0 : 0e0;
}
// Denormalized
throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added.");
} else if ((exponent ^ FLOAT_16_EXPONENT_MASK) == 0) {
if (fraction == 0) {
return sign == 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
}
return Double.NaN;
}
return Double.longBitsToDouble(
((long) sign << FLOAT_16_TO_64_SIGN_SHIFT)
| ((long) (exponent + FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION) << FLOAT_16_TO_64_EXPONENT_SHIFT)
| ((long) fraction << FLOAT_16_TO_64_FRACTION_SHIFT)
);
}

@Override
public double doubleValue() {
double value;
Expand All @@ -975,7 +1048,12 @@ public double doubleValue() {
return 0.0d;
}
ByteBuffer bytes = prepareByteBuffer(valueMarker.startIndex, valueMarker.endIndex);
if (length == FLOAT_32_BYTE_LENGTH) {
if (length == FLOAT_16_BYTE_LENGTH) {
if (minorVersion == 0) {
throw new IonException("Ion 1.0 floats may may only have length 0, 4, or 8.");
}
value = readFloat16(bytes);
} else if (length == FLOAT_32_BYTE_LENGTH) {
value = bytes.getFloat();
} else {
// Note: there is no need to check for other lengths here; the type ID byte is validated during next().
Expand Down
Loading
Loading