From 90bcb51dc6a3adf45adaaa56f07ea5e06d99811d Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Wed, 15 Nov 2023 14:28:46 -0800 Subject: [PATCH 1/5] Adds support for reading binary Ion 1.1 timestamps. --- src/com/amazon/ion/Timestamp.java | 13 + ...IonReaderContinuableApplicationBinary.java | 2 +- .../impl/IonReaderContinuableCoreBinary.java | 316 +++++++++++++++++- .../amazon/ion/impl/bin/IonEncoder_1_1.java | 4 +- .../ion/impl/bin/Ion_1_1_Constants.java | 123 ++++++- ...onReaderContinuableTopLevelBinaryTest.java | 181 +++++++++- 6 files changed, 603 insertions(+), 36 deletions(-) diff --git a/src/com/amazon/ion/Timestamp.java b/src/com/amazon/ion/Timestamp.java index 3ecb91baa3..19429189c7 100644 --- a/src/com/amazon/ion/Timestamp.java +++ b/src/com/amazon/ion/Timestamp.java @@ -676,6 +676,19 @@ else if (shouldCheckFraction) offset, APPLY_OFFSET_NO, CHECK_FRACTION_YES); } + /** + * @return a new Timestamp from the given components in local time, without validating the fractional seconds. + */ + @Deprecated + public static Timestamp _private_createFromLocalTimeFieldsUnchecked(Precision p, int year, int month, int day, + int hour, int minute, int second, + BigDecimal frac, Integer offset) + { + return new Timestamp(p, year, month, day, + hour, minute, second, frac, + offset, APPLY_OFFSET_YES, CHECK_FRACTION_NO); + } + /** * Creates a new Timestamp from a {@link Calendar}, preserving the * {@link Calendar}'s precision and local offset from UTC. diff --git a/src/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index 9fe4e334b8..444c6fe950 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -902,7 +902,7 @@ private enum State { boolean startsWithIonSymbolTable() { long savedPeekIndex = peekIndex; peekIndex = annotationSequenceMarker.startIndex; - int sid = minorVersion == 0 ? readVarUInt_1_0() : readVarUInt_1_1(); + int sid = minorVersion == 0 ? readVarUInt_1_0() : (int) readFlexUInt_1_1(); peekIndex = savedPeekIndex; return ION_SYMBOL_TABLE_SID == sid; } diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 811281e64c..1f06c46c76 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -19,6 +19,8 @@ import java.nio.ByteBuffer; import java.util.Date; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; + /** * An IonCursor capable of raw parsing of binary Ion streams. */ @@ -126,6 +128,22 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade new byte[12], }; + /** + * Returns a new or reused array of the requested size. + * @param requestedSize the size of the scratch space to retrieve. + * @return a byte array. + */ + private byte[] getScratchForSize(int requestedSize) { + byte[] bytes = null; + if (requestedSize < scratchForSize.length) { + bytes = scratchForSize[requestedSize]; + } + if (bytes == null) { + bytes = new byte[requestedSize]; + } + return bytes; + } + /** * Copy the requested number of bytes from the buffer into a scratch buffer of exactly the requested length. * @param startIndex the start index from which to copy. @@ -135,13 +153,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade private byte[] copyBytesToScratch(long startIndex, int length) { // Note: using reusable scratch buffers makes reading ints and decimals 1-5% faster and causes much less // GC churn. - byte[] bytes = null; - if (length < scratchForSize.length) { - bytes = scratchForSize[length]; - } - if (bytes == null) { - bytes = new byte[length]; - } + byte[] bytes = getScratchForSize(length); // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to // avoid having to calculate a limit. System.arraycopy(buffer, (int) startIndex, bytes, 0, bytes.length); @@ -444,8 +456,19 @@ private boolean classifyInteger_1_0() { return (buffer[(int) (valueMarker.startIndex)] & SINGLE_BYTE_MASK) <= MOST_SIGNIFICANT_BYTE_OF_MAX_INTEGER; } - int readVarUInt_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FlexUInt into an int. After this method returns, `peekIndex` points to the first byte after the end of + * the FlexUInt. + * @return the value. + */ + long readFlexUInt_1_1() { + int currentByte = buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK; + byte length = (byte) (Integer.numberOfTrailingZeros(currentByte) + 1); + long result = currentByte >>> length; + for (byte i = 1; i < length; i++) { + result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - length)); + } + return result; } private int readVarSym_1_1(Marker marker) { @@ -468,8 +491,263 @@ private BigInteger readBigInteger_1_1() { throw new UnsupportedOperationException(); } + /** + * Copies a FlexUInt into scratch space, converting it to its equivalent big-endian two's complement representation. + * @param firstByte the first (least-significant) byte in the FlexUInt representation. + * @param bitsToShiftRight the number of continuation bits that must be shifted out of every byte. + * @param startIndex the index of the second byte in the FlexUInt representation. + * @param length the number of bytes remaining in the FlexUInt representation. + * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value. + */ + private byte[] copyFlexUIntAsTwosComplementBytes(int firstByte, int bitsToShiftRight, long startIndex, int length) { + // If the most significant bit is set, the value would be interpreted as a negative two's complement integer. To + // avoid that, make sure the most significant byte in the copy is 0 by over-allocating the destination buffer. + // Additionally, one more byte than 'length' is always required because 'length' does not include the first + // byte. + byte[] bytes = getScratchForSize(length + 1 + ((buffer[(int) startIndex + length - 1] < 0) ? 1 : 0)); + bytes[0] = 0; + int copyIndex = bytes.length; + bytes[--copyIndex] = (byte) (firstByte >>> bitsToShiftRight); + int lowerBitsMask = ~(-1 << bitsToShiftRight); + for (int i = 0; i < length; i++) { + byte b = buffer[(int) startIndex + i]; + // The following implements a byte-by-byte bit shift. The lower bits in each byte are or'd with the higher + // bits from the previous byte. + bytes[copyIndex] |= (byte) ((b & lowerBitsMask) << (8 - bitsToShiftRight)); + bytes[--copyIndex] = (byte) ((b & SINGLE_BYTE_MASK) >>> bitsToShiftRight); + } + peekIndex = startIndex + length; + return bytes; + } + + /** + * Reads an FlexUInt value into a BigInteger. + * @return the value as a BigInteger. + */ + private BigInteger readFlexUIntAsBigInteger_1_1() { + BigInteger value; + int currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + int length = 0; + while (currentByte == 0) { + // Each byte of continuation bits without a set bit adds 8 to the length of the FlexUInt, but since the + // length includes the continuation byte(s), each empty byte adds a net 7 to the total length. + length += 7; + currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + } + int numberOfLengthBits = Integer.numberOfTrailingZeros(currentByte); + length += numberOfLengthBits; + if (length > 0) { + // NOTE: copying into scratch space is required because the encoded bytes, which are unsigned little-endian, + // need to be translated into two's complement big-endian bytes as required by this BigInteger constructor. + // This is expensive, but is cheaper than using arithmetic operations on a BigInteger directly, as this + // would require a new BigInteger to be allocated for each intermediate step. + value = new BigInteger(copyFlexUIntAsTwosComplementBytes(currentByte, numberOfLengthBits + 1, peekIndex, length)); + } else { + value = BigInteger.ZERO; + } + return value; + } + + /** + * Reads the fraction component of an Ion 1.1 long form timestamp. + * @return the value as a BigDecimal. + */ + private BigDecimal readTimestampFraction_1_1() { + // The fractional seconds are encoded as a (coefficient, scale) pair, + // which is similar to a decimal. The primary difference is that the scale represents a negative + // exponent because it is illegal for the fractional seconds value to be greater than or equal to 1.0 + // or less than 0.0. The coefficient is encoded as a FlexUInt (instead of FlexInt) to prevent the + // encoding of fractional seconds less than 0.0. The scale is encoded as a FixedUInt (instead of FixedInt) + // to discourage the encoding of decimal numbers greater than 1.0. + BigDecimal value; + peekIndex = valueMarker.startIndex + L_TIMESTAMP_SECOND_BYTE_LENGTH; + if (buffer[(int) peekIndex] != 0) { + // No need to allocate a BigInteger to hold the coefficient. + value = BigDecimal.valueOf(readFlexUInt_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex)); + } else { + // The coefficient may overflow a long, so a BigInteger is required. + value = new BigDecimal(readFlexUIntAsBigInteger_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex)); + } + if (BigDecimal.ONE.compareTo(value) < 1) { + throw new IllegalArgumentException(String.format("Fractional seconds %s must be greater than or equal to 0 and less than 1", value)); + } + return value; + } + + /** + * Reads an Ion 1.1 long form timestamp. + * @return the value. + */ + private Timestamp readTimestampLongForm_1_1() { + int year; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + BigDecimal fractionalSecond = null; + boolean isOffsetUnknown = true; + int offset = 0; + int length = (int) (valueMarker.endIndex - valueMarker.startIndex); + if (length > L_TIMESTAMP_SECOND_BYTE_LENGTH) { + // Fractional component. + fractionalSecond = readTimestampFraction_1_1(); + length = L_TIMESTAMP_SECOND_BYTE_LENGTH; + } + Timestamp.Precision precision = L_TIMESTAMP_PRECISION_FOR_LENGTH[length]; + long bits = 0; + for (int i = length - 1; i >= 0 ; i--) { + bits = (bits << 8) | (buffer[i + (int) valueMarker.startIndex] & SINGLE_BYTE_MASK); + } + switch (length) { + case L_TIMESTAMP_SECOND_BYTE_LENGTH: + second = (int) ((bits & L_TIMESTAMP_SECOND_MASK) >>> L_TIMESTAMP_SECOND_BIT_OFFSET); + case L_TIMESTAMP_MINUTE_BYTE_LENGTH: + offset = (int) ((bits & L_TIMESTAMP_OFFSET_MASK) >>> L_TIMESTAMP_OFFSET_BIT_OFFSET); + if ((offset ^ TWELVE_BIT_MASK) != 0) { + isOffsetUnknown = false; + offset -= L_TIMESTAMP_OFFSET_BIAS; + } + minute = (int) ((bits & L_TIMESTAMP_MINUTE_MASK) >>> L_TIMESTAMP_MINUTE_BIT_OFFSET); + hour = (int) (bits & L_TIMESTAMP_HOUR_MASK) >>> L_TIMESTAMP_HOUR_BIT_OFFSET; + case L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH: + day = (int) (bits & L_TIMESTAMP_DAY_MASK) >>> L_TIMESTAMP_DAY_BIT_OFFSET; + if (length == L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH) { + // Month and Day precision share the same length. If the day subfield is 0, the timestamp has + // month precision. Otherwise, it has day precision. + precision = day == 0 ? Timestamp.Precision.MONTH : Timestamp.Precision.DAY; + } + month = (int) (bits & L_TIMESTAMP_MONTH_MASK) >>> L_TIMESTAMP_MONTH_BIT_OFFSET; + case L_TIMESTAMP_YEAR_BYTE_LENGTH: + year = (int) (bits & L_TIMESTAMP_YEAR_MASK); + break; + default: + throw new IonException("Illegal timestamp encoding."); + } + try { + return Timestamp._private_createFromLocalTimeFieldsUnchecked( + precision, + year, + month, + day, + hour, + minute, + second, + fractionalSecond, + isOffsetUnknown ? null : offset + ); + } catch (IllegalArgumentException e) { + throw new IonException("Illegal timestamp encoding. ", e); + } + } + + /** + * Reads an Ion 1.1 timestamp in either the long or short form. + * @return the value. + */ private Timestamp readTimestamp_1_1() { - throw new UnsupportedOperationException(); + if (valueTid.variableLength) { + return readTimestampLongForm_1_1(); + } + Timestamp.Precision precision = S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET[valueTid.lowerNibble]; + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + BigDecimal fractionalSecond = null; + Integer offset = null; + long bits = 0; + for (int i = (int) Math.min(valueMarker.endIndex, valueMarker.startIndex + 8) - 1; i >= valueMarker.startIndex ; i--) { + bits = (bits << 8) | (buffer[i] & SINGLE_BYTE_MASK); + } + switch (precision) { + case FRACTION: + case SECOND: + int unscaledValue = -1; + int scale = -1; + int bound = -1; + switch (valueTid.lowerNibble) { + case S_O_TIMESTAMP_NANOSECOND_LOWER_NIBBLE: + // The least-significant 24 bits of the nanoseconds field are contained in the long. + unscaledValue = (int) ((bits & S_O_TIMESTAMP_NANOSECOND_EIGHTH_BYTE_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + // The most-significant 6 bits of the nanoseconds field are contained in the ninth byte. + unscaledValue |= (int) ((buffer[(int) valueMarker.endIndex - 1] & S_O_TIMESTAMP_NANOSECOND_NINTH_BYTE_MASK)) << S_O_TIMESTAMP_NANOSECOND_BITS_IN_EIGHTH_BYTE; + bound = MAX_NANOSECONDS; + scale = NANOSECOND_SCALE; + break; + case S_U_TIMESTAMP_NANOSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_NANOSECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_NANOSECONDS; + scale = NANOSECOND_SCALE; + break; + case S_O_TIMESTAMP_MICROSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_O_TIMESTAMP_MICROSECOND_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MICROSECONDS; + scale = MICROSECOND_SCALE; + break; + case S_U_TIMESTAMP_MICROSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_MICROSECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MICROSECONDS; + scale = MICROSECOND_SCALE; + break; + case S_O_TIMESTAMP_MILLISECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_O_TIMESTAMP_MILLISECOND_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MILLISECONDS; + scale = MILLISECOND_SCALE; + break; + case S_U_TIMESTAMP_MILLISECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_MILLISECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MILLISECONDS; + scale = MILLISECOND_SCALE; + break; + default: + // Second. + break; + } + if (unscaledValue >= 0) { + if (unscaledValue > bound) { + throw new IonException("Timestamp fraction must be between 0 and 1."); + } + fractionalSecond = BigDecimal.valueOf(unscaledValue, scale); + } + if (valueTid.lowerNibble >= S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE) { + second = (int) ((bits & S_O_TIMESTAMP_SECOND_MASK) >>> S_O_TIMESTAMP_SECOND_BIT_OFFSET); + } else { + second = (int) ((bits & S_U_TIMESTAMP_SECOND_MASK) >>> S_U_TIMESTAMP_SECOND_BIT_OFFSET); + } + case MINUTE: + if (valueTid.lowerNibble >= S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE) { + offset = (int) (((bits & S_O_TIMESTAMP_OFFSET_MASK) >>> S_O_TIMESTAMP_OFFSET_BIT_OFFSET) - S_O_TIMESTAMP_OFFSET_BIAS) * S_O_TIMESTAMP_OFFSET_INCREMENT; + } else { + offset = (bits & S_U_TIMESTAMP_UTC_FLAG) == 0 ? null : 0; + } + minute = (int) (bits & S_TIMESTAMP_MINUTE_MASK) >>> S_TIMESTAMP_MINUTE_BIT_OFFSET; + hour = (int) (bits & S_TIMESTAMP_HOUR_MASK) >>> S_TIMESTAMP_HOUR_BIT_OFFSET; + case DAY: + day = (int) (bits & S_TIMESTAMP_DAY_MASK) >>> S_TIMESTAMP_DAY_BIT_OFFSET; + case MONTH: + month = (int) (bits & S_TIMESTAMP_MONTH_MASK) >>> S_TIMESTAMP_MONTH_BIT_OFFSET; + case YEAR: + // Year is encoded as the number of years since 1970. + year = S_TIMESTAMP_YEAR_BIAS + (int) (bits & S_TIMESTAMP_YEAR_MASK); + } + try { + return Timestamp._private_createFromLocalTimeFieldsUnchecked( + precision, + year, + month, + day, + hour, + minute, + second, + fractionalSecond, + offset + ); + } catch (IllegalArgumentException e) { + throw new IonException("Illegal timestamp encoding. ", e); + } } private boolean readBoolean_1_1() { @@ -499,7 +777,7 @@ ByteBuffer prepareByteBuffer(long startIndex, long endIndex) { /** - * Reads a UInt. + * Reads a UInt (big-endian). * @param startIndex the index of the first byte in the UInt value. * @param endIndex the index of the first byte after the end of the UInt value. * @return the value. @@ -512,6 +790,20 @@ private long readUInt(long startIndex, long endIndex) { return result; } + /** + * Reads a FixedUInt (little-endian). + * @param startIndex the index of the first byte in the FixedUInt value. + * @param endIndex the index of the first byte after the end of the FixedUInt value. + * @return the value. + */ + private long readFixedUInt_1_1(long startIndex, long endIndex) { + long result = 0; + for (int i = (int) startIndex; i < endIndex; i++) { + result |= ((long) (buffer[i] & SINGLE_BYTE_MASK) << ((i - startIndex) * VALUE_BITS_PER_UINT_BYTE)); + } + return result; + } + @Override public boolean isNullValue() { return valueTid != null && valueTid.isNull; @@ -771,7 +1063,7 @@ IntList getAnnotationSidList() { } } else { while (peekIndex < annotationSequenceMarker.endIndex) { - annotationSids.add(readVarUInt_1_1()); + annotationSids.add((int) readFlexUInt_1_1()); } } peekIndex = savedPeekIndex; diff --git a/src/com/amazon/ion/impl/bin/IonEncoder_1_1.java b/src/com/amazon/ion/impl/bin/IonEncoder_1_1.java index 7b1ca9d4b2..d683480fd0 100644 --- a/src/com/amazon/ion/impl/bin/IonEncoder_1_1.java +++ b/src/com/amazon/ion/impl/bin/IonEncoder_1_1.java @@ -1,7 +1,6 @@ package com.amazon.ion.impl.bin; import com.amazon.ion.Decimal; -import com.amazon.ion.IonText; import com.amazon.ion.IonType; import com.amazon.ion.Timestamp; import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; @@ -9,7 +8,6 @@ import java.math.BigDecimal; import java.math.BigInteger; -import java.nio.charset.StandardCharsets; import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; import static java.lang.Double.doubleToRawLongBits; @@ -313,7 +311,7 @@ private static int writeShortFormTimestampValue(WriteBuffer buffer, Timestamp va } } else { long localOffset = (value.getLocalOffset().longValue() / 15) + (14 * 4); - bits |= (localOffset & LEAST_SIGNIFICANT_7_BITS) << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; + bits |= (localOffset & SEVEN_BIT_MASK) << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; if (value.getPrecision() == Timestamp.Precision.MINUTE) { buffer.writeByte(OpCodes.TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET); diff --git a/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index 8df501b349..bbac4ef5ed 100644 --- a/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -1,5 +1,7 @@ package com.amazon.ion.impl.bin; +import com.amazon.ion.Timestamp; + /** * Contains constants (other than OpCodes) which are generally applicable to both reading and writing binary Ion 1.1 */ @@ -8,32 +10,119 @@ private Ion_1_1_Constants() {} static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; + public static final int MAX_NANOSECONDS = 999999999; + public static final int NANOSECOND_SCALE = 9; + public static final int MAX_MICROSECONDS = 999999; + public static final int MICROSECOND_SCALE = 6; + public static final short MAX_MILLISECONDS = 999; + public static final int MILLISECOND_SCALE = 3; //////// Timestamp Field Constants //////// // S_TIMESTAMP_* is applicable to all short-form timestamps - static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; - static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; - static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; - static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; + public static final int S_TIMESTAMP_YEAR_BIAS = 1970; + public static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; + public static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; + public static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; + public static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; // S_U_TIMESTAMP_* is applicable to all short-form timestamps with a `U` bit - static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; - static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; - static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; + public static final int S_U_TIMESTAMP_NANOSECOND_LOWER_NIBBLE = 0x7; + public static final int S_U_TIMESTAMP_MICROSECOND_LOWER_NIBBLE = 0x6; + public static final int S_U_TIMESTAMP_MILLISECOND_LOWER_NIBBLE = 0x5; + public static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; + public static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; + public static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; // S_O_TIMESTAMP_* is applicable to all short-form timestamps with `o` (offset) bits - static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; - static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_NANOSECOND_LOWER_NIBBLE = 0xC; + public static final int S_O_TIMESTAMP_MICROSECOND_LOWER_NIBBLE = 0xB; + public static final int S_O_TIMESTAMP_MILLISECOND_LOWER_NIBBLE = 0xA; + public static final int S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE = 0x8; + public static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; + public static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_FRACTION_BIT_OFFSET = 40; + public static final int S_O_TIMESTAMP_NANOSECOND_BITS_IN_EIGHTH_BYTE = 24; + + // Explicit offsets are encoded in increments of 15 minutes, from -56. + public static final int S_O_TIMESTAMP_OFFSET_BIAS = 56; + public static final int S_O_TIMESTAMP_OFFSET_INCREMENT = 15; // L_TIMESTAMP_* is applicable to all long-form timestamps - static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; - static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; - static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; - static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; - static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; - static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; - static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; + public static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; + public static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; + public static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; + public static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; + public static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; + public static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; + public static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; + public static final int L_TIMESTAMP_SECOND_BYTE_LENGTH = 7; + public static final int L_TIMESTAMP_MINUTE_BYTE_LENGTH = 6; + public static final int L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH = 3; + public static final int L_TIMESTAMP_YEAR_BYTE_LENGTH = 2; + public static final int L_TIMESTAMP_OFFSET_BIAS = 1440; // 24 hours * 60 min/hour + + //////// Lookup tables //////// + + public static final Timestamp.Precision[] S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET = new Timestamp.Precision[] { + Timestamp.Precision.YEAR, // 0x70 + Timestamp.Precision.MONTH, // 0x71 + Timestamp.Precision.DAY, // 0x72 + Timestamp.Precision.MINUTE, // 0x73 (minute UTC) + Timestamp.Precision.SECOND, // 0x74 (second UTC) + Timestamp.Precision.SECOND, // 0x75 (millisecond UTC) + Timestamp.Precision.SECOND, // 0x76 (microsecond UTC) + Timestamp.Precision.SECOND, // 0x77 (nanosecond UTC) + Timestamp.Precision.MINUTE, // 0x78 (minute offset) + Timestamp.Precision.SECOND, // 0x79 (second offset) + Timestamp.Precision.SECOND, // 0x7A (millisecond offset) + Timestamp.Precision.SECOND, // 0x7B (microsecond offset) + Timestamp.Precision.SECOND, // 0x7C (nanosecond offset) + }; + + public static final Timestamp.Precision[] L_TIMESTAMP_PRECISION_FOR_LENGTH = new Timestamp.Precision[] { + null, // Length 0: illegal + null, // Length 1: illegal + Timestamp.Precision.YEAR, + null, // Length 3: Month or Day; additional examination required. + null, // Length 4: illegal + null, // Length 5: illegal + Timestamp.Precision.MINUTE, + Timestamp.Precision.SECOND + }; //////// Bit masks //////// - static final long LEAST_SIGNIFICANT_7_BITS = 0b01111111L; + public static final int FOUR_BIT_MASK = 0xF; + public static final int FIVE_BIT_MASK = 0x1F; + public static final int SIX_BIT_MASK = 0x3F; + public static final int SEVEN_BIT_MASK = 0x7F; + public static final int TEN_BIT_MASK = 0x3FF; + public static final int TWELVE_BIT_MASK = 0xFFF; + public static final int FOURTEEN_BIT_MASK = 0x3FFF; + public static final int TWENTY_BIT_MASK = 0xFFFFF; + public static final int TWENTY_FOUR_BIT_MASK = 0xFFFFFF; + public static final int THIRTY_BIT_MASK = 0x3FFFFFFF; + + public static final long L_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << L_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long L_TIMESTAMP_OFFSET_MASK = (long) TWELVE_BIT_MASK << L_TIMESTAMP_OFFSET_BIT_OFFSET; + public static final long L_TIMESTAMP_MINUTE_MASK = (long) SIX_BIT_MASK << L_TIMESTAMP_MINUTE_BIT_OFFSET; + public static final int L_TIMESTAMP_HOUR_MASK = FIVE_BIT_MASK << L_TIMESTAMP_HOUR_BIT_OFFSET; + public static final int L_TIMESTAMP_DAY_MASK = FIVE_BIT_MASK << L_TIMESTAMP_DAY_BIT_OFFSET; + public static final int L_TIMESTAMP_MONTH_MASK = FOUR_BIT_MASK << L_TIMESTAMP_MONTH_BIT_OFFSET; + public static final int L_TIMESTAMP_YEAR_MASK = FOURTEEN_BIT_MASK; + + public static final long S_O_TIMESTAMP_NANOSECOND_EIGHTH_BYTE_MASK = (long) TWENTY_FOUR_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_NANOSECOND_NINTH_BYTE_MASK = SIX_BIT_MASK; + public static final long S_U_TIMESTAMP_NANOSECOND_MASK = (long) THIRTY_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_MICROSECOND_MASK = (long) TWENTY_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_U_TIMESTAMP_MICROSECOND_MASK = (long) TWENTY_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_MILLISECOND_MASK = (long) TEN_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_U_TIMESTAMP_MILLISECOND_MASK = (long) TEN_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << S_O_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long S_U_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << S_U_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long S_O_TIMESTAMP_OFFSET_MASK = (long) SEVEN_BIT_MASK << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; + public static final int S_TIMESTAMP_MINUTE_MASK = SIX_BIT_MASK << S_TIMESTAMP_MINUTE_BIT_OFFSET; + public static final int S_TIMESTAMP_HOUR_MASK = FIVE_BIT_MASK << S_TIMESTAMP_HOUR_BIT_OFFSET; + public static final int S_TIMESTAMP_DAY_MASK = FIVE_BIT_MASK << S_TIMESTAMP_DAY_BIT_OFFSET; + public static final int S_TIMESTAMP_MONTH_MASK = FOUR_BIT_MASK << S_TIMESTAMP_MONTH_BIT_OFFSET; + public static final int S_TIMESTAMP_YEAR_MASK = SEVEN_BIT_MASK; } diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 695cd86d72..86fb93f608 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -4,8 +4,6 @@ package com.amazon.ion.impl; import com.amazon.ion.BufferConfiguration; -import com.amazon.ion.Decimal; -import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonException; @@ -34,6 +32,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.converter.ConvertWith; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; @@ -43,7 +42,6 @@ import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; -import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; @@ -55,6 +53,8 @@ import java.util.zip.GZIPInputStream; import static com.amazon.ion.BitUtils.bytes; +import static com.amazon.ion.TestUtils.StringToTimestamp; +import static com.amazon.ion.TestUtils.bitStringToByteArray; import static com.amazon.ion.TestUtils.gzippedBytes; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; @@ -365,6 +365,13 @@ static ExpectationProvider decimalValue(BigD )); } + static ExpectationProvider timestampValue(Timestamp expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("timestamp(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.timestampValue()) + )); + } + static ExpectationProvider stringValue(String expectedValue) { return consumer -> consumer.accept(new Expectation<>( String.format("string(%s)", expectedValue), @@ -3514,4 +3521,172 @@ public void earlyStepOutNonIncremental(boolean constructFromBytes) throws Except // However, the reader *must* fail if the user requests the next value, because the stream is incomplete. assertThrows(IonException.class, () -> reader.next()); // Unexpected EOF } + + /** + * Checks that the reader reads the expected timestamp value from the given input bits. + */ + private void assertIonTimestampCorrectlyParsed(boolean constructFromBytes, Timestamp expected, String inputBits) throws Exception { + byte[] inputBytes = new TestUtils.BinaryIonAppender(1).append(bitStringToByteArray(inputBits)).toByteArray(); + reader = readerFor(readerBuilder, constructFromBytes, inputBytes); + byteCounter.set(0); + assertSequence( + next(IonType.TIMESTAMP), timestampValue(expected), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "2023-10-15T01:00Z, 01110011 00110101 01111101 00000001 00001000", + "2023-10-15T01:59Z, 01110011 00110101 01111101 01100001 00001111", + "2023-10-15T11:22Z, 01110011 00110101 01111101 11001011 00001010", + "2023-10-15T23:00Z, 01110011 00110101 01111101 00010111 00001000", + "2023-10-15T23:59Z, 01110011 00110101 01111101 01110111 00001111", + "2023-10-15T11:22:00Z, 01110100 00110101 01111101 11001011 00001010 00000000", + "2023-10-15T11:22:33Z, 01110100 00110101 01111101 11001011 00011010 00000010", + "2023-10-15T11:22:59Z, 01110100 00110101 01111101 11001011 10111010 00000011", + "2023-10-15T11:22:33.000Z, 01110101 00110101 01111101 11001011 00011010 00000010 00000000", + "2023-10-15T11:22:33.444Z, 01110101 00110101 01111101 11001011 00011010 11110010 00000110", + "2023-10-15T11:22:33.999Z, 01110101 00110101 01111101 11001011 00011010 10011110 00001111", + "2023-10-15T11:22:33.000000Z, 01110110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555Z, 01110110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999Z, 01110110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000Z, 01110111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666Z, 01110111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999Z, 01110111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110", + }) + public void readTimestampValueWithUtcShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "1970T, 01110000 00000000", + "2023T, 01110000 00110101", + "2097T, 01110000 01111111", + "2023-01T, 01110001 10110101 00000000", + "2023-10T, 01110001 00110101 00000101", + "2023-12T, 01110001 00110101 00000110", + "2023-10-01T, 01110010 00110101 00001101", + "2023-10-15T, 01110010 00110101 01111101", + "2023-10-31T, 01110010 00110101 11111101", + "2023-10-15T01:00-00:00, 01110011 00110101 01111101 00000001 00000000", + "2023-10-15T01:59-00:00, 01110011 00110101 01111101 01100001 00000111", + "2023-10-15T11:22-00:00, 01110011 00110101 01111101 11001011 00000010", + "2023-10-15T23:00-00:00, 01110011 00110101 01111101 00010111 00000000", + "2023-10-15T23:59-00:00, 01110011 00110101 01111101 01110111 00000111", + "2023-10-15T11:22:00-00:00, 01110100 00110101 01111101 11001011 00000010 00000000", + "2023-10-15T11:22:33-00:00, 01110100 00110101 01111101 11001011 00010010 00000010", + "2023-10-15T11:22:59-00:00, 01110100 00110101 01111101 11001011 10110010 00000011", + "2023-10-15T11:22:33.000-00:00, 01110101 00110101 01111101 11001011 00010010 00000010 00000000", + "2023-10-15T11:22:33.444-00:00, 01110101 00110101 01111101 11001011 00010010 11110010 00000110", + "2023-10-15T11:22:33.999-00:00, 01110101 00110101 01111101 11001011 00010010 10011110 00001111", + "2023-10-15T11:22:33.000000-00:00, 01110110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555-00:00, 01110110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999-00:00, 01110110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000-00:00, 01110111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666-00:00, 01110111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999-00:00, 01110111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110", + }) + public void readTimestampValueWithUnknownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ooooommm ssssssoo ffffffff ffffffff ffffffff ..ffffff + "2023-10-15T01:00-14:00, 01111000 00110101 01111101 00000001 00000000 00000000", + "2023-10-15T01:00+14:00, 01111000 00110101 01111101 00000001 10000000 00000011", + "2023-10-15T01:00-01:15, 01111000 00110101 01111101 00000001 10011000 00000001", + "2023-10-15T01:00+01:15, 01111000 00110101 01111101 00000001 11101000 00000001", + "2023-10-15T01:59+01:15, 01111000 00110101 01111101 01100001 11101111 00000001", + "2023-10-15T11:22+01:15, 01111000 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T23:00+01:15, 01111000 00110101 01111101 00010111 11101000 00000001", + "2023-10-15T23:59+01:15, 01111000 00110101 01111101 01110111 11101111 00000001", + "2023-10-15T11:22:00+01:15, 01111001 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T11:22:33+01:15, 01111001 00110101 01111101 11001011 11101010 10000101", + "2023-10-15T11:22:59+01:15, 01111001 00110101 01111101 11001011 11101010 11101101", + "2023-10-15T11:22:33.000+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", + "2023-10-15T11:22:33.444+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", + "2023-10-15T11:22:33.999+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", + "2023-10-15T11:22:33.000000+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", + "2023-10-15T11:22:33.999999+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", + "2023-10-15T11:22:33.000000000+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", + "2023-10-15T11:22:33.999999999+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011", + + }) + public void readTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale + "0001T, 11110111 00000101 00000001 00000000", + "1947T, 11110111 00000101 10011011 00000111", + "9999T, 11110111 00000101 00001111 00100111", + "1947-01T, 11110111 00000111 10011011 01000111 00000000", + "1947-12T, 11110111 00000111 10011011 00000111 00000011", + "1947-01-01T, 11110111 00000111 10011011 01000111 00000100", + "1947-12-23T, 11110111 00000111 10011011 00000111 01011111", + "1947-12-31T, 11110111 00000111 10011011 00000111 01111111", + "1947-12-23T00:00Z, 11110111 00001101 10011011 00000111 01011111 00000000 10000000 00010110", + "1947-12-23T23:59Z, 11110111 00001101 10011011 00000111 11011111 10111011 10000011 00010110", + "1947-12-23T23:59:00Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", + "1947-12-23T23:59:59Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", + "1947-12-23T23:59:00.0Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", + "1947-12-23T23:59:00.00Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", + "1947-12-23T23:59:00.000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", + "1947-12-23T23:59:00.0000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", + "1947-12-23T23:59:00.00000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", + "1947-12-23T23:59:00.000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", + "1947-12-23T23:59:00.0000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", + "1947-12-23T23:59:00.00000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", + "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", + "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", + "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", + "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", + "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", + "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", + "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", + "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", + + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", + + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001", + + "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + + "11110111 10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + + "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + + "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + + "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101", + + // Offsets + "2048-01-01T01:01-23:59, 11110111 00001101 00000000 01001000 10000100 00010000 00000100 00000000", + "2048-01-01T01:01-00:02, 11110111 00001101 00000000 01001000 10000100 00010000 01111000 00010110", + "2048-01-01T01:01-00:01, 11110111 00001101 00000000 01001000 10000100 00010000 01111100 00010110", + "2048-01-01T01:01-00:00, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00111111", + "2048-01-01T01:01+00:00, 11110111 00001101 00000000 01001000 10000100 00010000 10000000 00010110", + "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", + "2048-01-01T01:01+00:02, 11110111 00001101 00000000 01001000 10000100 00010000 10001000 00010110", + "2048-01-01T01:01+23:59, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00101100", + }) + public void testWriteTimestampValueLongForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + } From 067211fbce19bcefa06cde7a37cd7bd7904981bb Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Wed, 15 Nov 2023 17:43:06 -0800 Subject: [PATCH 2/5] Adds support for reading binary Ion 1.1 nulls, booleans, ints, and floats. --- src/com/amazon/ion/impl/IonCursorBinary.java | 10 + .../impl/IonReaderContinuableCoreBinary.java | 84 ++++- src/com/amazon/ion/impl/IonTypeID.java | 20 ++ test/com/amazon/ion/TestUtils.java | 29 +- ...onReaderContinuableTopLevelBinaryTest.java | 298 +++++++++++++++++- 5 files changed, 423 insertions(+), 18 deletions(-) diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java index fe27513c5a..2224d2bc6f 100644 --- a/src/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/com/amazon/ion/impl/IonCursorBinary.java @@ -1262,6 +1262,9 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota if (endIndex > limit) { isValueIncomplete = true; } + if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) { + valueTid = IonTypeID.NULL_TYPE_IDS_1_1[buffer[(int)(peekIndex++) & SINGLE_BYTE_MASK]]; + } } markerToSet.typeId = valueTid; if (event == Event.START_CONTAINER) { @@ -1306,6 +1309,13 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated, } return true; } + if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) { + int nullTypeIndex = slowReadByte(); + if (nullTypeIndex < 0) { + return true; + } + valueTid = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex]; + } markerToSet.typeId = valueTid; if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER) { return true; diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 1f06c46c76..22eab75be3 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -475,6 +475,58 @@ private int readVarSym_1_1(Marker marker) { throw new UnsupportedOperationException(); } + /** + * Reads a FixedInt into a long. After this method returns, `peekIndex` points to the first byte after the end of + * the FixedInt. + * @return the value. + */ + private long readFixedInt_1_1() { + if (peekIndex >= valueMarker.endIndex) { + return 0; + } + long startIndex = peekIndex; + peekIndex = valueMarker.endIndex; + // Note: the following line performs sign extension via the cast to long without masking with 0xFF. + long value = buffer[(int) --peekIndex]; + while (peekIndex > startIndex) { + value = (value << 8) | (buffer[(int) --peekIndex] & SINGLE_BYTE_MASK); + } + peekIndex = valueMarker.endIndex; + return value; + } + + /** + * Copies a FixedInt into scratch space, converting it to its equivalent big-endian two's complement representation. + * @param startIndex the index of the second byte in the FixedInt representation. + * @param length the number of bytes remaining in the FixedInt representation. + * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value. + */ + private byte[] copyFixedIntAsTwosComplementBytes(long startIndex, int length) { + // FixedInt is a little-endian two's complement representation. Simply reverse the bytes. + byte[] bytes = getScratchForSize(length); + int copyIndex = bytes.length; + for (int i = 0; i < length; i++) { + bytes[--copyIndex] = buffer[(int) startIndex + i]; + } + peekIndex = startIndex + length; + return bytes; + } + + /** + * Reads a FixedInt value into a BigInteger. + * @param length the length of the value. + * @return the value. + */ + private BigInteger readFixedIntAsBigInteger_1_1(int length) { + BigInteger value; + if (length > 0) { + value = new BigInteger(copyFixedIntAsTwosComplementBytes(peekIndex, length)); + } else { + value = BigInteger.ZERO; + } + return value; + } + private BigDecimal readBigDecimal_1_1() { throw new UnsupportedOperationException(); } @@ -483,12 +535,22 @@ private Decimal readDecimal_1_1() { throw new UnsupportedOperationException(); } + /** + * Reads the FixedInt bounded by `valueMarker` into a `long`. + * @return the value. + */ private long readLong_1_1() { - throw new UnsupportedOperationException(); + peekIndex = valueMarker.startIndex; + return readFixedInt_1_1(); } + /** + * Reads the FixedInt bounded by `valueMarker` into a BigInteger. + * @return the value. + */ private BigInteger readBigInteger_1_1() { - throw new UnsupportedOperationException(); + peekIndex = valueMarker.startIndex; + return readFixedIntAsBigInteger_1_1((int) (valueMarker.endIndex - peekIndex)); } /** @@ -750,8 +812,13 @@ private Timestamp readTimestamp_1_1() { } } + /** + * Reads the boolean value using the type ID of the current value. + * @return the value. + */ private boolean readBoolean_1_1() { - throw new UnsupportedOperationException(); + // Boolean 'true' is 0x5E; 'false' is 0x5F. + return valueTid.lowerNibble == 0xE; } @Override @@ -823,15 +890,16 @@ public IntegerSize getIntegerSize() { return null; } prepareScalar(); - if (valueTid.length < 0) { + int length = valueTid.variableLength ? ((int) (valueMarker.endIndex - valueMarker.startIndex)) : valueTid.length; + if (length < 0) { return IntegerSize.BIG_INTEGER; - } else if (valueTid.length < INT_SIZE_IN_BYTES) { + } else if (length < INT_SIZE_IN_BYTES) { return IntegerSize.INT; - } else if (valueTid.length == INT_SIZE_IN_BYTES) { + } else if (length == INT_SIZE_IN_BYTES) { return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG; - } else if (valueTid.length < LONG_SIZE_IN_BYTES) { + } else if (length < LONG_SIZE_IN_BYTES) { return IntegerSize.LONG; - } else if (valueTid.length == LONG_SIZE_IN_BYTES) { + } else if (length == LONG_SIZE_IN_BYTES) { return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER; } return IntegerSize.BIG_INTEGER; diff --git a/src/com/amazon/ion/impl/IonTypeID.java b/src/com/amazon/ion/impl/IonTypeID.java index 35332041e7..d6fc6834ad 100644 --- a/src/com/amazon/ion/impl/IonTypeID.java +++ b/src/com/amazon/ion/impl/IonTypeID.java @@ -75,6 +75,7 @@ final class IonTypeID { static final IonTypeID[] TYPE_IDS_NO_IVM; static final IonTypeID[] TYPE_IDS_1_0; static final IonTypeID[] TYPE_IDS_1_1; + static final IonTypeID[] NULL_TYPE_IDS_1_1; static { TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES]; TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES]; @@ -84,6 +85,24 @@ final class IonTypeID { TYPE_IDS_1_0[b] = new IonTypeID((byte) b, 0); TYPE_IDS_1_1[b] = new IonTypeID((byte) b, 1); } + // In Ion 1.1, typed nulls are represented by the type ID 0xEB followed by a 1-byte UInt indicating the type. + // Therefore, the type of the typed null cannot be precomputed in Ion 1.1. In order to avoid adding more hot + // path branching to the reader, we create IonTypeIDs that mimic precomputed typed nulls in Ion 1.1 by reusing + // the typed null type IDs from Ion 1.0. When the type of the typed null is determined, the reader's current + // IonTypeID will be replaced with one of these. The index is the one-byte value that follows 0xEB. + NULL_TYPE_IDS_1_1 = new IonTypeID[12]; + NULL_TYPE_IDS_1_1[0x0] = TYPE_IDS_1_0[0x1F]; // null.bool + NULL_TYPE_IDS_1_1[0x1] = TYPE_IDS_1_0[0x2F]; // null.int + NULL_TYPE_IDS_1_1[0x2] = TYPE_IDS_1_0[0x4F]; // null.float + NULL_TYPE_IDS_1_1[0x3] = TYPE_IDS_1_0[0x5F]; // null.decimal + NULL_TYPE_IDS_1_1[0x4] = TYPE_IDS_1_0[0x6F]; // null.timestamp + NULL_TYPE_IDS_1_1[0x5] = TYPE_IDS_1_0[0x8F]; // null.string + NULL_TYPE_IDS_1_1[0x6] = TYPE_IDS_1_0[0x7F]; // null.symbol + NULL_TYPE_IDS_1_1[0x7] = TYPE_IDS_1_0[0xAF]; // null.blob + NULL_TYPE_IDS_1_1[0x8] = TYPE_IDS_1_0[0x9F]; // null.clob + NULL_TYPE_IDS_1_1[0x9] = TYPE_IDS_1_0[0xBF]; // null.list + NULL_TYPE_IDS_1_1[0xA] = TYPE_IDS_1_0[0xCF]; // null.sexp + NULL_TYPE_IDS_1_1[0xB] = TYPE_IDS_1_0[0xDF]; // null.struct } final IonType type; @@ -259,6 +278,7 @@ private IonTypeID(byte id, int minorVersion) { // Typed null. Type byte follows. type = null; isNull = true; + length = 1; } else if (lowerNibble <= 0xD) { isNopPad = true; type = null; diff --git a/test/com/amazon/ion/TestUtils.java b/test/com/amazon/ion/TestUtils.java index 80292a6b5d..100eac99fa 100644 --- a/test/com/amazon/ion/TestUtils.java +++ b/test/com/amazon/ion/TestUtils.java @@ -631,6 +631,28 @@ public static int byteLengthFromHexString(String hexString) { return (hexString.replaceAll("[^\\dA-F]", "").length()) / 2; } + /** + * Converts a string of octets in the given radix to a byte array. Octets must be separated by a space. + * @param octetString the string of space-separated octets. + * @param radix the radix of the octets in the string. + * @return a new byte array. + */ + private static byte[] octetStringToByteArray(String octetString, int radix) { + String[] bytesAsStrings = octetString.split(" "); + byte[] bytesAsBytes = new byte[bytesAsStrings.length]; + for (int i = 0; i < bytesAsBytes.length; i++) { + bytesAsBytes[i] = (byte) (Integer.parseInt(bytesAsStrings[i], radix) & 0xFF); + } + return bytesAsBytes; + } + + /** + * Converts a string of hex octets, such as "BE EF", to a byte array. + */ + public static byte[] hexStringToByteArray(String hexString) { + return octetStringToByteArray(hexString, 16); + } + /** * Converts a byte array to a string of bits, such as "00110110 10001001". * The purpose of this method is to make it easier to read and write test assertions. @@ -661,12 +683,7 @@ public static int byteLengthFromBitString(String bitString) { * Converts a string of bits, such as "00110110 10001001", to a byte array. */ public static byte[] bitStringToByteArray(String bitString) { - String[] bytesAsBits = bitString.split(" "); - byte[] bytesAsBytes = new byte[bytesAsBits.length]; - for (int i = 0; i < bytesAsBytes.length; i++) { - bytesAsBytes[i] = (byte) (Integer.parseInt(bytesAsBits[i], 2) & 0xFF); - } - return bytesAsBytes; + return octetStringToByteArray(bitString, 2); } /** diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 86fb93f608..80cb429e66 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -4,6 +4,7 @@ package com.amazon.ion.impl; import com.amazon.ion.BufferConfiguration; +import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonException; @@ -42,6 +43,7 @@ import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; +import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; @@ -56,6 +58,7 @@ import static com.amazon.ion.TestUtils.StringToTimestamp; import static com.amazon.ion.TestUtils.bitStringToByteArray; import static com.amazon.ion.TestUtils.gzippedBytes; +import static com.amazon.ion.TestUtils.hexStringToByteArray; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; import static com.amazon.ion.impl.IonCursorTestUtilities.type; @@ -344,10 +347,47 @@ static ExpectationProvider container(IonType }; } + static ExpectationProvider nullValue(IonType expectedType) { + return consumer -> consumer.accept(new Expectation<>( + String.format("null(%s)", expectedType), + reader -> { + assertTrue(reader.isNullValue()); + assertEquals(expectedType, reader.getType()); + } + )); + } + + static ExpectationProvider booleanValue(boolean expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("boolean(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.booleanValue()) + )); + } + static ExpectationProvider intValue(int expectedValue) { return consumer -> consumer.accept(new Expectation<>( String.format("int(%d)", expectedValue), - reader -> assertEquals(expectedValue, reader.intValue()) + reader -> { + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(expectedValue, reader.intValue()); + } + )); + } + + static ExpectationProvider longValue(long expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("long(%d)", expectedValue), + reader -> { + assertTrue(reader.getIntegerSize().ordinal() <= IntegerSize.LONG.ordinal()); + assertEquals(expectedValue, reader.longValue()); + } + )); + } + + static ExpectationProvider bigIntegerValue(BigInteger expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("bigInteger(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.bigIntegerValue()) )); } @@ -3523,12 +3563,262 @@ public void earlyStepOutNonIncremental(boolean constructFromBytes) throws Except } /** - * Checks that the reader reads the expected timestamp value from the given input bits. + * Creates an IonReader over the given data, which will be prepended with a binary Ion 1.1 IVM. + * @param data the data to read. + * @param constructFromBytes whether to construct the reader from bytes or an InputStream. + * @return a new reader. */ - private void assertIonTimestampCorrectlyParsed(boolean constructFromBytes, Timestamp expected, String inputBits) throws Exception { - byte[] inputBytes = new TestUtils.BinaryIonAppender(1).append(bitStringToByteArray(inputBits)).toByteArray(); + private IonReader readerForIon11(byte[] data, boolean constructFromBytes) throws Exception { + byte[] inputBytes = new TestUtils.BinaryIonAppender(1).append(data).toByteArray(); reader = readerFor(readerBuilder, constructFromBytes, inputBytes); byteCounter.set(0); + return reader; + } + + /** + * Checks that the reader reads a null value of the expected type from the given input bytes. + */ + private void assertNullCorrectlyParsed(boolean constructFromBytes, IonType expectedType, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(expectedType), nullValue(expectedType), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " NULL, EA", + " BOOL, EB 00", + " INT, EB 01", + " FLOAT, EB 02", + " DECIMAL, EB 03", + "TIMESTAMP, EB 04", + " STRING, EB 05", + " SYMBOL, EB 06", + " BLOB, EB 07", + " CLOB, EB 08", + " LIST, EB 09", + " SEXP, EB 0A", + " STRUCT, EB 0B", + }) + public void readNullValue(IonType expectedType, String inputBytes) throws Exception { + assertNullCorrectlyParsed(true, expectedType, inputBytes); + assertNullCorrectlyParsed(false, expectedType, inputBytes); + } + + /** + * Checks that the reader reads the expected boolean from the given input bits. + */ + private void assertBooleanCorrectlyParsed(boolean constructFromBytes, boolean expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.BOOL), booleanValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "true, 5E", + "false, 5F", + }) + public void readBooleanValue(Boolean expectedValue, String inputBytes) throws Exception { + assertBooleanCorrectlyParsed(true, expectedValue, inputBytes); + assertBooleanCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected int from the given input bits. + */ + private void assertIntCorrectlyParsed(boolean constructFromBytes, int expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), intValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 50", + " 1, 51 01", + " 17, 51 11", + " 127, 51 7F", + " 128, 52 80 00", + " 5555, 52 B3 15", + " 32767, 52 FF 7F", + " 32768, 53 00 80 00", + " 292037, 53 C5 74 04", + " 321672342, 54 96 54 2C 13", + " 2147483647, 54 FF FF FF 7F", // Integer.MAX_VALUE + " -1, 51 FF", + " -2, 51 FE", + " -14, 51 F2", + " -128, 51 80", + " -129, 52 7F FF", + " -944, 52 50 FC", + " -32768, 52 00 80", + " -32769, 53 FF 7F FF", + " -8388608, 53 00 00 80", + " -8388609, 54 FF FF 7F FF", + " -2147483648, 54 00 00 00 80", // Integer.MIN_VALUE + }) + public void readIntValue(int expectedValue, String inputBytes) throws Exception { + assertIntCorrectlyParsed(true, expectedValue, inputBytes); + assertIntCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected long from the given input bits. + */ + private void assertLongCorrectlyParsed(boolean constructFromBytes, long expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), longValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 50", + " 1, 51 01", + " 17, 51 11", + " 127, 51 7F", + " 128, 52 80 00", + " 5555, 52 B3 15", + " 32767, 52 FF 7F", + " 32768, 53 00 80 00", + " 292037, 53 C5 74 04", + " 321672342, 54 96 54 2C 13", + " 2147483647, 54 FF FF FF 7F", // Integer.MAX_VALUE + " 64121672342, 55 96 12 F3 ED 0E", + " 1274120283167, 56 1F A4 7C A7 28 01", + " 851274120283167, 57 1F C4 8B B3 3A 06 03", + " 72624976668147840, 58 80 40 20 10 08 04 02 01", + " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " -1, 51 FF", + " -2, 51 FE", + " -14, 51 F2", + " -128, 51 80", + " -129, 52 7F FF", + " -944, 52 50 FC", + " -32768, 52 00 80", + " -32769, 53 FF 7F FF", + " -8388608, 53 00 00 80", + " -8388609, 54 FF FF 7F FF", + " -2147483648, 54 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + }) + public void readLongValue(long expectedValue, String inputBytes) throws Exception { + assertLongCorrectlyParsed(true, expectedValue, inputBytes); + assertLongCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected BigInteger from the given input bits. + */ + private void assertBigIntegerCorrectlyParsed(boolean constructFromBytes, BigInteger expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), bigIntegerValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 50", + " 1, 51 01", + " 17, 51 11", + " 127, 51 7F", + " 128, 52 80 00", + " 5555, 52 B3 15", + " 32767, 52 FF 7F", + " 32768, 53 00 80 00", + " 292037, 53 C5 74 04", + " 321672342, 54 96 54 2C 13", + " 2147483647, 54 FF FF FF 7F", // Integer.MAX_VALUE + " 64121672342, 55 96 12 F3 ED 0E", + " 1274120283167, 56 1F A4 7C A7 28 01", + " 851274120283167, 57 1F C4 8B B3 3A 06 03", + " 72624976668147840, 58 80 40 20 10 08 04 02 01", + " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " 9223372036854775808, F5 13 00 00 00 00 00 00 00 80 00", + "999999999999999999999999999999, F5 1B FF FF FF 3F EA ED 74 46 D0 9C 2C 9F 0C", + " -1, 51 FF", + " -2, 51 FE", + " -14, 51 F2", + " -128, 51 80", + " -129, 52 7F FF", + " -944, 52 50 FC", + " -32768, 52 00 80", + " -32769, 53 FF 7F FF", + " -8388608, 53 00 00 80", + " -8388609, 54 FF FF 7F FF", + " -2147483648, 54 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + " -9223372036854775809, F5 13 FF FF FF FF FF FF FF 7F FF", + "-99999999999999999999999999999, F5 1B 01 00 00 60 35 E8 8D 92 51 F0 E1 BC FE", + }) + public void readBigIntegerValue(BigInteger expectedValue, String inputBytes) throws Exception { + assertBigIntegerCorrectlyParsed(true, expectedValue, inputBytes); + assertBigIntegerCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected double from the given input bits. + */ + private void assertDoubleCorrectlyParsed(boolean constructFromBytes, double expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.FLOAT), doubleValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0.0, 5A", + " 1.0, 5C 3F 80 00 00", + " 1.5, 5C 3F C0 00 00", + " 3.141592653589793, 5D 40 09 21 FB 54 44 2D 18", + " 4.00537109375, 5C 40 80 2C 00", + " 4.11111111111, 5D 40 10 71 C7 1C 71 C2 39", + " 423542.09375, 5C 48 CE CE C3", + " 8236423542.09375, 5D 41 FE AE DD 97 61 80 00", + " 1.79769313486231570e+308, 5D 7F EF FF FF FF FF FF FF", // Double.MAX_VALUE + " -1.0, 5C BF 80 00 00", + " -1.5, 5C BF C0 00 00", + " -3.141592653589793, 5D C0 09 21 FB 54 44 2D 18", + " -4.00537109375, 5C C0 80 2C 00", + " -4.11111111111, 5D C0 10 71 C7 1C 71 C2 39", + " -423542.09375, 5C C8 CE CE C3", + " -8236423542.09375, 5D C1 FE AE DD 97 61 80 00", + "-1.79769313486231570e+308, 5D FF EF FF FF FF FF FF FF", // Double.MIN_VALUE + " NaN, 5C 7F C0 00 00", + " Infinity, 5C 7F 80 00 00", + " -Infinity, 5C FF 80 00 00", + }) + public void readDoubleValue(double expectedValue, String inputBytes) throws Exception { + assertDoubleCorrectlyParsed(true, expectedValue, inputBytes); + assertDoubleCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected timestamp value from the given input bits. + */ + private void assertIonTimestampCorrectlyParsed(boolean constructFromBytes, Timestamp expected, String inputBits) throws Exception { + reader = readerForIon11(bitStringToByteArray(inputBits), constructFromBytes); assertSequence( next(IonType.TIMESTAMP), timestampValue(expected), next(null) From 5a872fd3dc1e98a31e77656e00088a8e6b58828f Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Thu, 16 Nov 2023 15:39:04 -0800 Subject: [PATCH 3/5] Improves the readability of IonTypeID and makes other minor changes to address PR feedback. --- .../impl/IonReaderContinuableCoreBinary.java | 12 +-- src/com/amazon/ion/impl/IonTypeID.java | 99 ++++++++++--------- .../ion/impl/bin/Ion_1_1_Constants.java | 2 +- src/com/amazon/ion/impl/bin/OpCodes.java | 14 ++- 4 files changed, 69 insertions(+), 58 deletions(-) diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 22eab75be3..161c261b17 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -376,8 +376,8 @@ private Timestamp readTimestamp_1_0() { offset = readVarInt_1_0(firstByte); } int year = readVarUInt_1_0(); - int month = 0; - int day = 0; + int month = 1; + int day = 1; int hour = 0; int minute = 0; int second = 0; @@ -642,8 +642,8 @@ private BigDecimal readTimestampFraction_1_1() { */ private Timestamp readTimestampLongForm_1_1() { int year; - int month = 0; - int day = 0; + int month = 1; + int day = 1; int hour = 0; int minute = 0; int second = 0; @@ -713,8 +713,8 @@ private Timestamp readTimestamp_1_1() { } Timestamp.Precision precision = S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET[valueTid.lowerNibble]; int year = 0; - int month = 0; - int day = 0; + int month = 1; + int day = 1; int hour = 0; int minute = 0; int second = 0; diff --git a/src/com/amazon/ion/impl/IonTypeID.java b/src/com/amazon/ion/impl/IonTypeID.java index d6fc6834ad..05f6ca9b83 100644 --- a/src/com/amazon/ion/impl/IonTypeID.java +++ b/src/com/amazon/ion/impl/IonTypeID.java @@ -5,6 +5,8 @@ import com.amazon.ion.IonType; +import static com.amazon.ion.impl.bin.OpCodes.*; + /** * Holds pre-computed information about a binary Ion type ID byte. */ @@ -200,10 +202,9 @@ private IonTypeID(byte id, int minorVersion) { byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK); // For 0xF0 (delimited end byte) the entire byte is included. This avoids having to create a separate field // just to identify this byte. - lowerNibble = (id == (byte) 0xF0) ? (byte) 0xF0 : (byte) (id & LOW_NIBBLE_BITMASK); + lowerNibble = (id == DELIMITED_END_MARKER) ? DELIMITED_END_MARKER : (byte) (id & LOW_NIBBLE_BITMASK); isNegativeInt = false; // Not applicable for Ion 1.1; sign is conveyed by the representation. - // 0xF4 is a length-prefixed macro invocation; 0xEF is a system macro invocation. - isMacroInvocation = upperNibble <= 0x4 || id == (byte) 0xF4 || id == (byte) 0xEF; + isMacroInvocation = upperNibble <= 0x4 || id == LENGTH_PREFIXED_MACRO_INVOCATION || id == SYSTEM_MACRO_INVOCATION; boolean isNopPad = false; boolean isNull = false; int length = -1; @@ -227,17 +228,19 @@ private IonTypeID(byte id, int minorVersion) { macroId = -1; variableLength = (upperNibble == 0xF && lowerNibble >= 0x4) // Variable length, all types. - || (upperNibble == 0x6 && lowerNibble == 0xF) // Decimal with negative-zero coefficient. - || (upperNibble == 0xE && lowerNibble == 0x6) // Variable length annotation SIDs. - || (upperNibble == 0xE && lowerNibble == 0x9) // Variable length annotation FlexSyms. - || (upperNibble == 0xE && lowerNibble == 0xD); // Variable length NOP. + || id == POSITIVE_ZERO_DECIMAL + || id == ANNOTATIONS_MANY_SYMBOL_ADDRESS + || id == ANNOTATIONS_MANY_FLEX_SYM + || id == VARIABLE_LENGTH_NOP; isInlineable = // struct with VarSym field names. (upperNibble == 0xD && lowerNibble >= 0x2) - // Delimited struct, variable-length symbol, variable-length struct with FlexSym field names. - || (upperNibble == 0xF && (lowerNibble == 0x3 || lowerNibble == 0x9 || lowerNibble == 0xD)) - // Annotation wrappers with VarSyms. - || (upperNibble == 0xE && lowerNibble >= 0x7 && lowerNibble <= 9) + || id == DELIMITED_STRUCT + || id == VARIABLE_LENGTH_INLINE_SYMBOL + || id == VARIABLE_LENGTH_STRUCT_WITH_FLEXSYMS + || id == ANNOTATIONS_1_FLEX_SYM + || id == ANNOTATIONS_2_FLEX_SYM + || id == ANNOTATIONS_MANY_FLEX_SYM // Symbol values with inline text. || upperNibble == 0x9; IonType typeFromUpperNibble = BINARY_TOKEN_TYPES_1_1[upperNibble]; @@ -248,38 +251,37 @@ private IonTypeID(byte id, int minorVersion) { if (lowerNibble <= 0x8) { type = IonType.INT; length = lowerNibble; - } else if (lowerNibble >= 0xE) { + } else if (id == BOOLEAN_TRUE || id == BOOLEAN_FALSE) { type = IonType.BOOL; length = 0; } else { type = IonType.FLOAT; - if (lowerNibble == 0xA) { + if (id == FLOAT_ZERO_LENGTH) { length = 0; // 0e0 - } else if (lowerNibble == 0xB) { + } else if (id == FLOAT_16) { length = 2; - } else if (lowerNibble == 0xC) { + } else if (id == FLOAT_32) { length = 4; - } else if (lowerNibble == 0xD) { + } else if (id == FLOAT_64) { length = 8; } } } else if (upperNibble == 0xE) { - if (lowerNibble <= 0x3) { + if (id == SYMBOL_ADDRESS_1_BYTE || id == SYMBOL_ADDRESS_2_BYTES || id == SYMBOL_ADDRESS_MANY_BYTES) { type = IonType.SYMBOL; - length = lowerNibble == 0x3 ? -1 : lowerNibble; + length = id == SYMBOL_ADDRESS_MANY_BYTES ? -1 : lowerNibble; } else if (lowerNibble <= 0x9) { type = ION_TYPE_ANNOTATION_WRAPPER; - } else if (lowerNibble == 0xA) { - // null.null + } else if (id == NULL_UNTYPED) { type = IonType.NULL; isNull = true; length = 0; - } else if (lowerNibble == 0xB) { + } else if (id == NULL_TYPED) { // Typed null. Type byte follows. type = null; isNull = true; length = 1; - } else if (lowerNibble <= 0xD) { + } else if (id == ONE_BYTE_NOP || id == VARIABLE_LENGTH_NOP) { isNopPad = true; type = null; length = variableLength ? -1 : 0; @@ -288,29 +290,28 @@ private IonTypeID(byte id, int minorVersion) { type = null; } } else { // 0xF - if (lowerNibble == 0) { - // Delimited end + if (id == DELIMITED_END_MARKER) { type = null; length = 0; - } else if (lowerNibble == 0x3 || lowerNibble == 0xC || lowerNibble == 0xD) { + } else if (id == DELIMITED_STRUCT || id == VARIABLE_LENGTH_STRUCT_WITH_SIDS || id == VARIABLE_LENGTH_STRUCT_WITH_FLEXSYMS) { type = IonType.STRUCT; - } else if (lowerNibble == 0x5) { + } else if (id == VARIABLE_LENGTH_INTEGER) { type = IonType.INT; - } else if (lowerNibble == 0x6) { + } else if (id == VARIABLE_LENGTH_DECIMAL) { type = IonType.DECIMAL; - } else if (lowerNibble == 0x7) { + } else if (id == VARIABLE_LENGTH_TIMESTAMP) { type = IonType.TIMESTAMP; - } else if (lowerNibble == 0x9) { + } else if (id == VARIABLE_LENGTH_INLINE_SYMBOL) { type = IonType.SYMBOL; - } else if (lowerNibble == 0x8) { + } else if (id == VARIABLE_LENGTH_STRING) { type = IonType.STRING; - } else if (lowerNibble == 0xE) { + } else if (id == VARIABLE_LENGTH_BLOB) { type = IonType.BLOB; - } else if (lowerNibble == 0xF) { + } else if (id == VARIABLE_LENGTH_CLOB) { type = IonType.CLOB; - } else if (lowerNibble == 0x1 || lowerNibble == 0xA) { + } else if (id == DELIMITED_LIST || id == VARIABLE_LENGTH_LIST) { type = IonType.LIST; - } else if (lowerNibble == 0x2 || lowerNibble == 0xB) { + } else if (id == DELIMITED_SEXP || id == VARIABLE_LENGTH_SEXP) { type = IonType.SEXP; } else { // 0x4 // Variable length macro invocation @@ -322,34 +323,34 @@ private IonTypeID(byte id, int minorVersion) { if (type == IonType.TIMESTAMP) { // Short-form timestamps. Long-form timestamps use the upper nibble 0xF, forcing them to take // the previous branch. - switch (lowerNibble) { - case 0x0: + switch (id) { + case TIMESTAMP_YEAR_PRECISION: length = 1; break; - case 0x1: - case 0x2: + case TIMESTAMP_MONTH_PRECISION: + case TIMESTAMP_DAY_PRECISION: length = 2; break; - case 0x3: + case TIMESTAMP_MINUTE_PRECISION: length = 4; break; - case 0x4: - case 0x8: - case 0x9: + case TIMESTAMP_SECOND_PRECISION: + case TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET: + case TIMESTAMP_SECOND_PRECISION_WITH_OFFSET: length = 5; break; - case 0x5: + case TIMESTAMP_MILLIS_PRECISION: length = 6; break; - case 0x6: - case 0xA: + case TIMESTAMP_MICROS_PRECISION: + case TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET: length = 7; break; - case 0x7: - case 0xB: + case TIMESTAMP_NANOS_PRECISION: + case TIMESTAMP_MICROS_PRECISION_WITH_OFFSET: length = 8; break; - case 0xC: + case TIMESTAMP_NANOS_PRECISION_WITH_OFFSET: length = 9; break; } @@ -359,7 +360,7 @@ private IonTypeID(byte id, int minorVersion) { } } } - isDelimited = upperNibble == 0xF && lowerNibble <= 0x3; + isDelimited = id == DELIMITED_LIST || id == DELIMITED_SEXP || id == DELIMITED_STRUCT; this.isNopPad = isNopPad; this.isNull = isNull; this.length = length; diff --git a/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index bbac4ef5ed..21dce227ad 100644 --- a/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -14,7 +14,7 @@ private Ion_1_1_Constants() {} public static final int NANOSECOND_SCALE = 9; public static final int MAX_MICROSECONDS = 999999; public static final int MICROSECOND_SCALE = 6; - public static final short MAX_MILLISECONDS = 999; + public static final int MAX_MILLISECONDS = 999; public static final int MILLISECOND_SCALE = 3; //////// Timestamp Field Constants //////// diff --git a/src/com/amazon/ion/impl/bin/OpCodes.java b/src/com/amazon/ion/impl/bin/OpCodes.java index a84c4bc853..1bcda996ef 100644 --- a/src/com/amazon/ion/impl/bin/OpCodes.java +++ b/src/com/amazon/ion/impl/bin/OpCodes.java @@ -50,15 +50,25 @@ private OpCodes() {} public static final byte ANNOTATIONS_MANY_FLEX_SYM = (byte) 0xE9; public static final byte NULL_UNTYPED = (byte) 0xEA; public static final byte NULL_TYPED = (byte) 0xEB; - // 0xEC, 0xED NOP + public static final byte ONE_BYTE_NOP = (byte) 0xEC; + public static final byte VARIABLE_LENGTH_NOP = (byte) 0xED; // 0xEE Reserved - // 0xEF System Macro Invocation + public static final byte SYSTEM_MACRO_INVOCATION = (byte) 0xEF; + public static final byte DELIMITED_END_MARKER = (byte) 0xF0; + public static final byte DELIMITED_LIST = (byte) 0xF1; + public static final byte DELIMITED_SEXP = (byte) 0xF2; + public static final byte DELIMITED_STRUCT = (byte) 0xF3; + public static final byte LENGTH_PREFIXED_MACRO_INVOCATION = (byte) 0xF4; public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF5; public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF6; public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF7; public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF8; public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xF9; + public static final byte VARIABLE_LENGTH_LIST = (byte) 0xFA; + public static final byte VARIABLE_LENGTH_SEXP = (byte) 0xFB; + public static final byte VARIABLE_LENGTH_STRUCT_WITH_SIDS = (byte) 0xFC; + public static final byte VARIABLE_LENGTH_STRUCT_WITH_FLEXSYMS = (byte) 0xFD; public static final byte VARIABLE_LENGTH_BLOB = (byte) 0xFE; public static final byte VARIABLE_LENGTH_CLOB = (byte) 0xFF; } From 7c870c6ef648448e26a0cf5b0f54ac63a0f3278e Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Thu, 16 Nov 2023 17:21:40 -0800 Subject: [PATCH 4/5] Updates read support for binary Ion 1.1 timestamps. --- .../impl/IonReaderContinuableCoreBinary.java | 110 ++++++------------ ...onReaderContinuableTopLevelBinaryTest.java | 54 +++++---- 2 files changed, 65 insertions(+), 99 deletions(-) diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 161c261b17..471ead7e13 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -496,31 +496,38 @@ private long readFixedInt_1_1() { } /** - * Copies a FixedInt into scratch space, converting it to its equivalent big-endian two's complement representation. - * @param startIndex the index of the second byte in the FixedInt representation. - * @param length the number of bytes remaining in the FixedInt representation. + * Copies a FixedInt or FixedUInt into scratch space, converting it to its equivalent big-endian two's complement + * representation. If the provided length is longer than the actual length of the value, the most significant + * byte in the two's complement representation will be zero. + * @param startIndex the index of the second byte in the FixedInt or FixedUInt representation. + * @param length the number of bytes remaining in the FixedInt or FixedUInt representation. * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value. */ - private byte[] copyFixedIntAsTwosComplementBytes(long startIndex, int length) { + private byte[] copyFixedIntOrFixedUIntAsTwosComplementBytes(long startIndex, int length) { // FixedInt is a little-endian two's complement representation. Simply reverse the bytes. byte[] bytes = getScratchForSize(length); + // Clear the most significant byte in case the scratch space is padded to accommodate an unsigned value with + // its highest bit set. + bytes[0] = 0; int copyIndex = bytes.length; - for (int i = 0; i < length; i++) { - bytes[--copyIndex] = buffer[(int) startIndex + i]; + for (long i = startIndex; i < valueMarker.endIndex; i++) { + bytes[--copyIndex] = buffer[(int) i]; } - peekIndex = startIndex + length; + peekIndex = valueMarker.endIndex; return bytes; } /** - * Reads a FixedInt value into a BigInteger. - * @param length the length of the value. + * Reads a FixedInt or FixedUInt value into a BigInteger. + * @param length the length of the two's complement representation of the value. For FixedInts, this is always + * equal to the length of the value; for FixedUInts, this is one byte larger than the length of the + * value if the highest bit in the unsigned representation is set. * @return the value. */ - private BigInteger readFixedIntAsBigInteger_1_1(int length) { + private BigInteger readFixedIntOrFixedUIntAsBigInteger_1_1(int length) { BigInteger value; if (length > 0) { - value = new BigInteger(copyFixedIntAsTwosComplementBytes(peekIndex, length)); + value = new BigInteger(copyFixedIntOrFixedUIntAsTwosComplementBytes(peekIndex, length)); } else { value = BigInteger.ZERO; } @@ -550,64 +557,7 @@ private long readLong_1_1() { */ private BigInteger readBigInteger_1_1() { peekIndex = valueMarker.startIndex; - return readFixedIntAsBigInteger_1_1((int) (valueMarker.endIndex - peekIndex)); - } - - /** - * Copies a FlexUInt into scratch space, converting it to its equivalent big-endian two's complement representation. - * @param firstByte the first (least-significant) byte in the FlexUInt representation. - * @param bitsToShiftRight the number of continuation bits that must be shifted out of every byte. - * @param startIndex the index of the second byte in the FlexUInt representation. - * @param length the number of bytes remaining in the FlexUInt representation. - * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value. - */ - private byte[] copyFlexUIntAsTwosComplementBytes(int firstByte, int bitsToShiftRight, long startIndex, int length) { - // If the most significant bit is set, the value would be interpreted as a negative two's complement integer. To - // avoid that, make sure the most significant byte in the copy is 0 by over-allocating the destination buffer. - // Additionally, one more byte than 'length' is always required because 'length' does not include the first - // byte. - byte[] bytes = getScratchForSize(length + 1 + ((buffer[(int) startIndex + length - 1] < 0) ? 1 : 0)); - bytes[0] = 0; - int copyIndex = bytes.length; - bytes[--copyIndex] = (byte) (firstByte >>> bitsToShiftRight); - int lowerBitsMask = ~(-1 << bitsToShiftRight); - for (int i = 0; i < length; i++) { - byte b = buffer[(int) startIndex + i]; - // The following implements a byte-by-byte bit shift. The lower bits in each byte are or'd with the higher - // bits from the previous byte. - bytes[copyIndex] |= (byte) ((b & lowerBitsMask) << (8 - bitsToShiftRight)); - bytes[--copyIndex] = (byte) ((b & SINGLE_BYTE_MASK) >>> bitsToShiftRight); - } - peekIndex = startIndex + length; - return bytes; - } - - /** - * Reads an FlexUInt value into a BigInteger. - * @return the value as a BigInteger. - */ - private BigInteger readFlexUIntAsBigInteger_1_1() { - BigInteger value; - int currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; - int length = 0; - while (currentByte == 0) { - // Each byte of continuation bits without a set bit adds 8 to the length of the FlexUInt, but since the - // length includes the continuation byte(s), each empty byte adds a net 7 to the total length. - length += 7; - currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; - } - int numberOfLengthBits = Integer.numberOfTrailingZeros(currentByte); - length += numberOfLengthBits; - if (length > 0) { - // NOTE: copying into scratch space is required because the encoded bytes, which are unsigned little-endian, - // need to be translated into two's complement big-endian bytes as required by this BigInteger constructor. - // This is expensive, but is cheaper than using arithmetic operations on a BigInteger directly, as this - // would require a new BigInteger to be allocated for each intermediate step. - value = new BigInteger(copyFlexUIntAsTwosComplementBytes(currentByte, numberOfLengthBits + 1, peekIndex, length)); - } else { - value = BigInteger.ZERO; - } - return value; + return readFixedIntOrFixedUIntAsBigInteger_1_1((int) (valueMarker.endIndex - peekIndex)); } /** @@ -615,20 +565,30 @@ private BigInteger readFlexUIntAsBigInteger_1_1() { * @return the value as a BigDecimal. */ private BigDecimal readTimestampFraction_1_1() { - // The fractional seconds are encoded as a (coefficient, scale) pair, + // The fractional seconds are encoded as a (scale, coefficient) pair, // which is similar to a decimal. The primary difference is that the scale represents a negative // exponent because it is illegal for the fractional seconds value to be greater than or equal to 1.0 - // or less than 0.0. The coefficient is encoded as a FlexUInt (instead of FlexInt) to prevent the - // encoding of fractional seconds less than 0.0. The scale is encoded as a FixedUInt (instead of FixedInt) + // or less than 0.0. The coefficient is encoded as a FixedUInt (instead of FixedInt) to prevent the + // encoding of fractional seconds less than 0.0. The scale is encoded as a FlexUInt (instead of FlexInt) // to discourage the encoding of decimal numbers greater than 1.0. BigDecimal value; peekIndex = valueMarker.startIndex + L_TIMESTAMP_SECOND_BYTE_LENGTH; - if (buffer[(int) peekIndex] != 0) { + int scale = (int) readFlexUInt_1_1(); + if (peekIndex >= valueMarker.endIndex) { + return BigDecimal.valueOf(0, scale); + } + int length = (int) (valueMarker.endIndex - peekIndex); + // Since the coefficient is stored in a FixedUInt, some 8-byte values cannot fit in a signed 8-byte long. + // Take the quick path for values up to 7 bytes rather than performing additional checks. This should cover + // almost all real-world timestamp fractions. + if (length <= 7) { // No need to allocate a BigInteger to hold the coefficient. - value = BigDecimal.valueOf(readFlexUInt_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex)); + value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale); } else { // The coefficient may overflow a long, so a BigInteger is required. - value = new BigDecimal(readFlexUIntAsBigInteger_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex)); + // If the most-significant bit is set, pad the length by one byte so that the value remains unsigned. + length += (buffer[(int) valueMarker.endIndex - 1] < 0) ? 1 : 0; + value = new BigDecimal(readFixedIntOrFixedUIntAsBigInteger_1_1(length), scale); } if (BigDecimal.ONE.compareTo(value) < 1) { throw new IllegalArgumentException(String.format("Fractional seconds %s must be greater than or equal to 0 and less than 1", value)); diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 80cb429e66..de9c2f6c40 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -3919,7 +3919,7 @@ public void readTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTime @ParameterizedTest @CsvSource({ - // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale + // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Scale+ Coefficient "0001T, 11110111 00000101 00000001 00000000", "1947T, 11110111 00000101 10011011 00000111", "9999T, 11110111 00000101 00001111 00100111", @@ -3932,37 +3932,43 @@ public void readTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTime "1947-12-23T23:59Z, 11110111 00001101 10011011 00000111 11011111 10111011 10000011 00010110", "1947-12-23T23:59:00Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", "1947-12-23T23:59:59Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", - "1947-12-23T23:59:00.0Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", - "1947-12-23T23:59:00.00Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", - "1947-12-23T23:59:00.000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", - "1947-12-23T23:59:00.0000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", - "1947-12-23T23:59:00.00000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", - "1947-12-23T23:59:00.000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", - "1947-12-23T23:59:00.0000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", - "1947-12-23T23:59:00.00000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", - "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", - "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", - "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", - "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", - "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", - "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", - "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", - "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", + "1947-12-23T23:59:00.0Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011", + "1947-12-23T23:59:00.00Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101", + "1947-12-23T23:59:00.000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111", + "1947-12-23T23:59:00.0000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001", + "1947-12-23T23:59:00.00000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011", + "1947-12-23T23:59:00.000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101", + "1947-12-23T23:59:00.0000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111", + "1947-12-23T23:59:00.00000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001", + "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011 00001001", + "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101 01100011", + "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111 11100111 00000011", + "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001 00001111 00100111", + "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011 10011111 10000110 00000001", + "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101 00111111 01000010 00001111", + "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111 01111111 10010110 10011000 00000000", + "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001 11111111 11100000 11110101 00000101", + + "1947-12-23T23:59:00.9223372036854775807Z, " + // Long.MAX_VALUE + "11110111 00100001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + + "1947-12-23T23:59:00.9223372036854775808Z, " + // Long.MAX_VALUE + 1 (unsigned) + "11110111 00100001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100111 00000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", + "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001", + "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10100010 00000101", "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + - "11110111 10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + - "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + - "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + - "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + - "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101", + "11110111 10001001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 10011111 00110010 00110001 10001111 11001101 00011001 " + + "01001111 00011110 10101000 11001111 11110100 00011000 11010101 00101000 00101011 10101110 00001001 10100100 11011110 01001101 10001111 00100001 11100001 " + + "11111101 01101111 11011110 10000011 11100010 00011010 11101101 10001110 10010101 11001101 01010001 11100110 01010110 01010000 11011110 00000110 01001101 " + + "11111110 00010100", // Offsets "2048-01-01T01:01-23:59, 11110111 00001101 00000000 01001000 10000100 00010000 00000100 00000000", From 7b9305ef8252bcb0d3ceaff547940440dbdbaf2e Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Fri, 17 Nov 2023 17:46:03 -0800 Subject: [PATCH 5/5] Adds support for reading binary normalized half-precision floats; adds more tests for numeric types. --- src/com/amazon/ion/impl/IonCursorBinary.java | 10 ++-- .../impl/IonReaderContinuableCoreBinary.java | 54 ++++++++++++++++++- ...onReaderContinuableTopLevelBinaryTest.java | 49 +++++++++++++++++ 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java index 2224d2bc6f..0948114cbb 100644 --- a/src/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/com/amazon/ion/impl/IonCursorBinary.java @@ -948,7 +948,10 @@ private long uncheckedReadFlexUInt_1_1() { */ private long slowReadFlexUInt_1_1() { // TODO perf: try 1-byte special case checks. Least-significant bits of 1 indicate 1-byte - int currentByte = slowPeekByte(); + int currentByte = slowReadByte(); + if (currentByte < 0) { + return -1; + } if (currentByte == 0) { throw new IonException("Found a VarUInt that was too large to fit in a `long`"); } @@ -1345,11 +1348,6 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar if (valueTid.isDelimited) { endIndex = DELIMITED_MARKER; } else if (valueTid.variableLength) { - // At this point the value must be at least 2 more bytes: 1 for the smallest-possible value length - // and 1 for the smallest-possible value representation. - if (!fillAt(peekIndex, 2)) { - return true; - } valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadFlexUInt_1_1(); if (valueLength < 0) { return true; diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 471ead7e13..1db18d1e4d 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -33,6 +33,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade private static final int LOWER_SEVEN_BITS_BITMASK = 0x7F; private static final int SINGLE_BYTE_MASK = 0xFF; + private static final int TWO_BYTE_MASK = 0xFFFF; // Isolates the lowest six bits in a byte. private static final int LOWER_SIX_BITS_BITMASK = 0x3F; @@ -64,7 +65,8 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // The second-most significant bit in the most significant byte of a VarInt is the sign. private static final int VAR_INT_SIGN_BITMASK = 0x40; - // 32-bit floats must declare length 4. + private static final int FLOAT_16_BYTE_LENGTH = 2; + private static final int FLOAT_32_BYTE_LENGTH = 4; // Initial capacity of the ArrayList used to hold the symbol IDs of the annotations on the current value. @@ -993,6 +995,49 @@ public int intValue() { return (int) longValue(); } + // IEEE-754 half-precision (s=sign, e=exponent, f=fraction): seee_eeff_ffff_ffff + private static final int FLOAT_16_SIGN_MASK = 0b1000_0000_0000_0000; + private static final int FLOAT_16_EXPONENT_MASK = 0b0111_1100_0000_0000; + private static final int FLOAT_16_FRACTION_MASK = 0b0000_0011_1111_1111; + + // float64 bias: 1023; float16 bias: 15. Shift left to align with the masked exponent bits. + private static final int FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION = (1023 - 15) << Integer.numberOfTrailingZeros(FLOAT_16_EXPONENT_MASK); + // The float16 sign bit has bit index 15; the float64 sign bit has bit index 63. + private static final int FLOAT_16_TO_64_SIGN_SHIFT = 63 - 15; + // The 5 float16 exponent bits start at index 10; the 11 float64 exponent bits start at index 52. + private static final int FLOAT_16_TO_64_EXPONENT_SHIFT = 52 - 10; + // The most significant float16 fraction bit is at index 9; the most significant float64 fraction bit is at index 51. + private static final int FLOAT_16_TO_64_FRACTION_SHIFT = 51 - 9; + + /** + * Reads the next two bytes from the given ByteBuffer as a 16-bit float, returning the value as a Java double. + * @param byteBuffer a buffer positioned at the first byte of the 16-bit float. + * @return the value. + */ + private static double readFloat16(ByteBuffer byteBuffer) { + int bits = byteBuffer.getShort() & TWO_BYTE_MASK; + int sign = bits & FLOAT_16_SIGN_MASK; + int exponent = bits & FLOAT_16_EXPONENT_MASK; + int fraction = bits & FLOAT_16_FRACTION_MASK; + if (exponent == 0) { + if (fraction == 0) { + return sign == 0 ? -0e0 : 0e0; + } + // Denormalized + throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added."); + } else if ((exponent ^ FLOAT_16_EXPONENT_MASK) == 0) { + if (fraction == 0) { + return sign == 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; + } + return Double.NaN; + } + return Double.longBitsToDouble( + ((long) sign << FLOAT_16_TO_64_SIGN_SHIFT) + | ((long) (exponent + FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION) << FLOAT_16_TO_64_EXPONENT_SHIFT) + | ((long) fraction << FLOAT_16_TO_64_FRACTION_SHIFT) + ); + } + @Override public double doubleValue() { double value; @@ -1003,7 +1048,12 @@ public double doubleValue() { return 0.0d; } ByteBuffer bytes = prepareByteBuffer(valueMarker.startIndex, valueMarker.endIndex); - if (length == FLOAT_32_BYTE_LENGTH) { + if (length == FLOAT_16_BYTE_LENGTH) { + if (minorVersion == 0) { + throw new IonException("Ion 1.0 floats may may only have length 0, 4, or 8."); + } + value = readFloat16(bytes); + } else if (length == FLOAT_32_BYTE_LENGTH) { value = bytes.getFloat(); } else { // Note: there is no need to check for other lengths here; the type ID byte is validated during next(). diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index de9c2f6c40..40d1fe8461 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -3690,6 +3690,9 @@ private void assertLongCorrectlyParsed(boolean constructFromBytes, long expected " 1, 51 01", " 17, 51 11", " 127, 51 7F", + " 127, 52 7F 00", + " 127, 54 7F 00 00 00", + " 127, 58 7F 00 00 00 00 00 00 00", " 128, 52 80 00", " 5555, 52 B3 15", " 32767, 52 FF 7F", @@ -3721,6 +3724,30 @@ public void readLongValue(long expectedValue, String inputBytes) throws Exceptio assertLongCorrectlyParsed(false, expectedValue, inputBytes); } + @ParameterizedTest + @CsvSource({ + " 0, F5 01", + " 1, F5 03 01", + " 17, F5 03 11", + " 127, F5 03 7F", + " 128, F5 05 80 00", + " 2147483647, F5 09 FF FF FF 7F", // Integer.MAX_VALUE + " 72624976668147840, F5 11 80 40 20 10 08 04 02 01", + " 9223372036854775807, F5 11 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " -1, F5 03 FF", + " -2, F5 03 FE", + " -14, F5 03 F2", + " -128, F5 03 80", + " -129, F5 05 7F FF", + " -2147483648, F5 09 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, F5 11 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, F5 11 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + }) + public void readLongValueFromVariableLengthEncoding(long expectedValue, String inputBytes) throws Exception { + assertLongCorrectlyParsed(true, expectedValue, inputBytes); + assertLongCorrectlyParsed(false, expectedValue, inputBytes); + } + /** * Checks that the reader reads the expected BigInteger from the given input bits. */ @@ -3739,6 +3766,9 @@ private void assertBigIntegerCorrectlyParsed(boolean constructFromBytes, BigInte " 1, 51 01", " 17, 51 11", " 127, 51 7F", + " 127, 52 7F 00", + " 127, 54 7F 00 00 00", + " 127, 58 7F 00 00 00 00 00 00 00", " 128, 52 80 00", " 5555, 52 B3 15", " 32767, 52 FF 7F", @@ -3789,25 +3819,44 @@ private void assertDoubleCorrectlyParsed(boolean constructFromBytes, double expe @ParameterizedTest @CsvSource({ " 0.0, 5A", + " 0.0, 5B 00 00", + " 0.0, 5C 00 00 00 00", + " 0.0, 5D 00 00 00 00 00 00 00 00", + " -0.0, 5B 80 00", + " -0.0, 5C 80 00 00 00", + " -0.0, 5D 80 00 00 00 00 00 00 00", + " 1.0, 5B 3C 00", " 1.0, 5C 3F 80 00 00", + " 1.0, 5D 3F F0 00 00 00 00 00 00", " 1.5, 5C 3F C0 00 00", + " 0.00006103515625, 5B 04 00", // Smallest positive normal half-precision float + " 0.333251953125, 5B 35 55", // Nearest half-precision representation of one third " 3.141592653589793, 5D 40 09 21 FB 54 44 2D 18", " 4.00537109375, 5C 40 80 2C 00", " 4.11111111111, 5D 40 10 71 C7 1C 71 C2 39", + " 65504, 5B 7B FF", // Largest normal half-precision float " 423542.09375, 5C 48 CE CE C3", " 8236423542.09375, 5D 41 FE AE DD 97 61 80 00", " 1.79769313486231570e+308, 5D 7F EF FF FF FF FF FF FF", // Double.MAX_VALUE " -1.0, 5C BF 80 00 00", " -1.5, 5C BF C0 00 00", + " -2, 5B C0 00", " -3.141592653589793, 5D C0 09 21 FB 54 44 2D 18", " -4.00537109375, 5C C0 80 2C 00", " -4.11111111111, 5D C0 10 71 C7 1C 71 C2 39", + " -65504, 5B FB FF", // Smallest normal half-precision float " -423542.09375, 5C C8 CE CE C3", " -8236423542.09375, 5D C1 FE AE DD 97 61 80 00", "-1.79769313486231570e+308, 5D FF EF FF FF FF FF FF FF", // Double.MIN_VALUE + " NaN, 5B 7C 01", + " Infinity, 5B 7C 00", + " -Infinity, 5B FC 00", " NaN, 5C 7F C0 00 00", " Infinity, 5C 7F 80 00 00", " -Infinity, 5C FF 80 00 00", + " NaN, 5D 7F F0 00 00 00 00 00 01", + " Infinity, 5D 7F F0 00 00 00 00 00 00", + " -Infinity, 5D FF F0 00 00 00 00 00 00", }) public void readDoubleValue(double expectedValue, String inputBytes) throws Exception { assertDoubleCorrectlyParsed(true, expectedValue, inputBytes);