amazon-ion · tgregg · Nov 20, 2023 · Nov 16, 2023 · Nov 16, 2023 · Nov 17, 2023
diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java
@@ -948,7 +948,10 @@ private long uncheckedReadFlexUInt_1_1() {
      */
     private long slowReadFlexUInt_1_1() {
         // TODO perf: try 1-byte special case checks. Least-significant bits of 1 indicate 1-byte
-        int currentByte = slowPeekByte();
+        int currentByte = slowReadByte();
+        if (currentByte < 0) {
+            return -1;
+        }
         if (currentByte == 0) {
             throw new IonException("Found a VarUInt that was too large to fit in a `long`");
         }
@@ -1262,6 +1265,9 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota
             if (endIndex > limit) {
                 isValueIncomplete = true;
             }
+            if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) {
+                valueTid = IonTypeID.NULL_TYPE_IDS_1_1[buffer[(int)(peekIndex++) & SINGLE_BYTE_MASK]];
+            }
         }
         markerToSet.typeId = valueTid;
         if (event == Event.START_CONTAINER) {
@@ -1306,6 +1312,13 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated,
             }
             return true;
         }
+        if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) {
+            int nullTypeIndex = slowReadByte();
+            if (nullTypeIndex < 0) {
+                return true;
+            }
+            valueTid = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex];
+        }
         markerToSet.typeId = valueTid;
         if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER) {
             return true;
@@ -1335,11 +1348,6 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
         if (valueTid.isDelimited) {
             endIndex = DELIMITED_MARKER;
         } else if (valueTid.variableLength) {
-            // At this point the value must be at least 2 more bytes: 1 for the smallest-possible value length
-            // and 1 for the smallest-possible value representation.
-            if (!fillAt(peekIndex, 2)) {
-                return true;
-            }
             valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadFlexUInt_1_1();
             if (valueLength < 0) {
                 return true;

diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java
@@ -33,6 +33,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade
     private static final int LOWER_SEVEN_BITS_BITMASK = 0x7F;
 
     private static final int SINGLE_BYTE_MASK = 0xFF;
+    private static final int TWO_BYTE_MASK = 0xFFFF;
 
     // Isolates the lowest six bits in a byte.
     private static final int LOWER_SIX_BITS_BITMASK = 0x3F;
@@ -64,7 +65,8 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade
     // The second-most significant bit in the most significant byte of a VarInt is the sign.
     private static final int VAR_INT_SIGN_BITMASK = 0x40;
 
-    // 32-bit floats must declare length 4.
+    private static final int FLOAT_16_BYTE_LENGTH = 2;
+
     private static final int FLOAT_32_BYTE_LENGTH = 4;
 
     // Initial capacity of the ArrayList used to hold the symbol IDs of the annotations on the current value.
@@ -376,8 +378,8 @@ private Timestamp readTimestamp_1_0() {
             offset = readVarInt_1_0(firstByte);
         }
         int year = readVarUInt_1_0();
-        int month = 0;
-        int day = 0;
+        int month = 1;
+        int day = 1;
         int hour = 0;
         int minute = 0;
         int second = 0;
@@ -475,98 +477,120 @@ private int readVarSym_1_1(Marker marker) {
         throw new UnsupportedOperationException();
     }
 
-    private BigDecimal readBigDecimal_1_1() {
-        throw new UnsupportedOperationException();
+    /**
+     * Reads a FixedInt into a long. After this method returns, `peekIndex` points to the first byte after the end of
+     * the FixedInt.
+     * @return the value.
+     */
+    private long readFixedInt_1_1() {
+        if (peekIndex >= valueMarker.endIndex) {
+            return 0;
+        }
+        long startIndex = peekIndex;
+        peekIndex = valueMarker.endIndex;
+        // Note: the following line performs sign extension via the cast to long without masking with 0xFF.
+        long value = buffer[(int) --peekIndex];
+        while (peekIndex > startIndex) {
+            value = (value << 8) | (buffer[(int) --peekIndex] & SINGLE_BYTE_MASK);
+        }
+        peekIndex = valueMarker.endIndex;
+        return value;
     }
 
-    private Decimal readDecimal_1_1() {
-        throw new UnsupportedOperationException();
+    /**
+     * Copies a FixedInt or FixedUInt into scratch space, converting it to its equivalent big-endian two's complement
+     * representation. If the provided length is longer than the actual length of the value, the most significant
+     * byte in the two's complement representation will be zero.
+     * @param startIndex the index of the second byte in the FixedInt or FixedUInt representation.
+     * @param length the number of bytes remaining in the FixedInt or FixedUInt representation.
+     * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value.
+     */
+    private byte[] copyFixedIntOrFixedUIntAsTwosComplementBytes(long startIndex, int length) {
+        // FixedInt is a little-endian two's complement representation. Simply reverse the bytes.
+        byte[] bytes = getScratchForSize(length);
+        // Clear the most significant byte in case the scratch space is padded to accommodate an unsigned value with
+        // its highest bit set.
+        bytes[0] = 0;
+        int copyIndex = bytes.length;
+        for (long i = startIndex; i < valueMarker.endIndex; i++) {
+            bytes[--copyIndex] = buffer[(int) i];
+        }
+        peekIndex = valueMarker.endIndex;
+        return bytes;
     }
 
-    private long readLong_1_1() {
+    /**
+     * Reads a FixedInt or FixedUInt value into a BigInteger.
+     * @param length the length of the two's complement representation of the value. For FixedInts, this is always
+     *               equal to the length of the value; for FixedUInts, this is one byte larger than the length of the
+     *               value if the highest bit in the unsigned representation is set.
+     * @return the value.
+     */
+     private BigInteger readFixedIntOrFixedUIntAsBigInteger_1_1(int length) {
+         BigInteger value;
+         if (length > 0) {
+             value = new BigInteger(copyFixedIntOrFixedUIntAsTwosComplementBytes(peekIndex, length));
+         } else {
+             value = BigInteger.ZERO;
+         }
+         return value;
+     }
+
+    private BigDecimal readBigDecimal_1_1() {
         throw new UnsupportedOperationException();
     }
 
-    private BigInteger readBigInteger_1_1() {
+    private Decimal readDecimal_1_1() {
         throw new UnsupportedOperationException();
     }
 
     /**
-     * Copies a FlexUInt into scratch space, converting it to its equivalent big-endian two's complement representation.
-     * @param firstByte the first (least-significant) byte in the FlexUInt representation.
-     * @param bitsToShiftRight the number of continuation bits that must be shifted out of every byte.
-     * @param startIndex the index of the second byte in the FlexUInt representation.
-     * @param length the number of bytes remaining in the FlexUInt representation.
-     * @return a byte[] (either new or reused) containing the big-endian two's complement representation of the value.
+     * Reads the FixedInt bounded by `valueMarker` into a `long`.
+     * @return the value.
      */
-    private byte[] copyFlexUIntAsTwosComplementBytes(int firstByte, int bitsToShiftRight, long startIndex, int length) {
-        // If the most significant bit is set, the value would be interpreted as a negative two's complement integer. To
-        // avoid that, make sure the most significant byte in the copy is 0 by over-allocating the destination buffer.
-        // Additionally, one more byte than 'length' is always required because 'length' does not include the first
-        // byte.
-        byte[] bytes = getScratchForSize(length + 1 + ((buffer[(int) startIndex + length - 1] < 0) ? 1 : 0));
-        bytes[0] = 0;
-        int copyIndex = bytes.length;
-        bytes[--copyIndex] = (byte) (firstByte >>> bitsToShiftRight);
-        int lowerBitsMask = ~(-1 << bitsToShiftRight);
-        for (int i = 0; i < length; i++) {
-            byte b = buffer[(int) startIndex + i];
-            // The following implements a byte-by-byte bit shift. The lower bits in each byte are or'd with the higher
-            // bits from the previous byte.
-            bytes[copyIndex] |= (byte) ((b & lowerBitsMask) << (8 - bitsToShiftRight));
-            bytes[--copyIndex] = (byte) ((b & SINGLE_BYTE_MASK) >>> bitsToShiftRight);
-        }
-        peekIndex = startIndex + length;
-        return bytes;
+    private long readLong_1_1() {
+        peekIndex = valueMarker.startIndex;
+        return readFixedInt_1_1();
     }
 
     /**
-     * Reads an FlexUInt value into a BigInteger.
-     * @return the value as a BigInteger.
+     * Reads the FixedInt bounded by `valueMarker` into a BigInteger.
+     * @return the value.
      */
-    private BigInteger readFlexUIntAsBigInteger_1_1() {
-        BigInteger value;
-        int currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
-        int length = 0;
-        while (currentByte == 0) {
-            // Each byte of continuation bits without a set bit adds 8 to the length of the FlexUInt, but since the
-            // length includes the continuation byte(s), each empty byte adds a net 7 to the total length.
-            length += 7;
-            currentByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
-        }
-        int numberOfLengthBits = Integer.numberOfTrailingZeros(currentByte);
-        length += numberOfLengthBits;
-        if (length > 0) {
-            // NOTE: copying into scratch space is required because the encoded bytes, which are unsigned little-endian,
-            // need to be translated into two's complement big-endian bytes as required by this BigInteger constructor.
-            // This is expensive, but is cheaper than using arithmetic operations on a BigInteger directly, as this
-            // would require a new BigInteger to be allocated for each intermediate step.
-            value = new BigInteger(copyFlexUIntAsTwosComplementBytes(currentByte, numberOfLengthBits + 1, peekIndex, length));
-        } else {
-            value = BigInteger.ZERO;
-        }
-        return value;
+    private BigInteger readBigInteger_1_1() {
+        peekIndex = valueMarker.startIndex;
+        return readFixedIntOrFixedUIntAsBigInteger_1_1((int) (valueMarker.endIndex - peekIndex));
     }
 
     /**
      * Reads the fraction component of an Ion 1.1 long form timestamp.
      * @return the value as a BigDecimal.
      */
     private BigDecimal readTimestampFraction_1_1() {
-        // The fractional seconds are encoded as a (coefficient, scale) pair,
+        // The fractional seconds are encoded as a (scale, coefficient) pair,
         // which is similar to a decimal. The primary difference is that the scale represents a negative
         // exponent because it is illegal for the fractional seconds value to be greater than or equal to 1.0
-        // or less than 0.0. The coefficient is encoded as a FlexUInt (instead of FlexInt) to prevent the
-        // encoding of fractional seconds less than 0.0. The scale is encoded as a FixedUInt (instead of FixedInt)
+        // or less than 0.0. The coefficient is encoded as a FixedUInt (instead of FixedInt) to prevent the
+        // encoding of fractional seconds less than 0.0. The scale is encoded as a FlexUInt (instead of FlexInt)
         // to discourage the encoding of decimal numbers greater than 1.0.
         BigDecimal value;
         peekIndex = valueMarker.startIndex + L_TIMESTAMP_SECOND_BYTE_LENGTH;
-        if (buffer[(int) peekIndex] != 0) {
+        int scale = (int) readFlexUInt_1_1();
+        if (peekIndex >= valueMarker.endIndex) {
+            return BigDecimal.valueOf(0, scale);
+        }
+        int length = (int) (valueMarker.endIndex - peekIndex);
+        // Since the coefficient is stored in a FixedUInt, some 8-byte values cannot fit in a signed 8-byte long.
+        // Take the quick path for values up to 7 bytes rather than performing additional checks. This should cover
+        // almost all real-world timestamp fractions.
+        if (length <= 7) {
             // No need to allocate a BigInteger to hold the coefficient.
-            value = BigDecimal.valueOf(readFlexUInt_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex));
+            value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale);
         } else {
             // The coefficient may overflow a long, so a BigInteger is required.
-            value = new BigDecimal(readFlexUIntAsBigInteger_1_1(), (int) readFixedUInt_1_1(peekIndex, valueMarker.endIndex));
+            // If the most-significant bit is set, pad the length by one byte so that the value remains unsigned.
+            length += (buffer[(int) valueMarker.endIndex - 1] < 0) ? 1 : 0;
+            value = new BigDecimal(readFixedIntOrFixedUIntAsBigInteger_1_1(length), scale);
         }
         if (BigDecimal.ONE.compareTo(value) < 1) {
             throw new IllegalArgumentException(String.format("Fractional seconds %s must be greater than or equal to 0 and less than 1", value));
@@ -580,8 +604,8 @@ private BigDecimal readTimestampFraction_1_1() {
      */
     private Timestamp readTimestampLongForm_1_1() {
         int year;
-        int month = 0;
-        int day = 0;
+        int month = 1;
+        int day = 1;
         int hour = 0;
         int minute = 0;
         int second = 0;
@@ -651,8 +675,8 @@ private Timestamp readTimestamp_1_1() {
         }
         Timestamp.Precision precision = S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET[valueTid.lowerNibble];
         int year = 0;
-        int month = 0;
-        int day = 0;
+        int month = 1;
+        int day = 1;
         int hour = 0;
         int minute = 0;
         int second = 0;
@@ -750,8 +774,13 @@ private Timestamp readTimestamp_1_1() {
         }
     }
 
+    /**
+     * Reads the boolean value using the type ID of the current value.
+     * @return the value.
+     */
     private boolean readBoolean_1_1() {
-        throw new UnsupportedOperationException();
+        // Boolean 'true' is 0x5E; 'false' is 0x5F.
+        return valueTid.lowerNibble == 0xE;
     }
 
     @Override
@@ -823,15 +852,16 @@ public IntegerSize getIntegerSize() {
             return null;
         }
         prepareScalar();
-        if (valueTid.length < 0) {
+        int length = valueTid.variableLength ? ((int) (valueMarker.endIndex - valueMarker.startIndex)) : valueTid.length;
+        if (length < 0) {
             return IntegerSize.BIG_INTEGER;
-        } else if (valueTid.length < INT_SIZE_IN_BYTES) {
+        } else if (length < INT_SIZE_IN_BYTES) {
             return IntegerSize.INT;
-        } else if (valueTid.length == INT_SIZE_IN_BYTES) {
+        } else if (length == INT_SIZE_IN_BYTES) {
             return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG;
-        } else if (valueTid.length < LONG_SIZE_IN_BYTES) {
+        } else if (length < LONG_SIZE_IN_BYTES) {
             return IntegerSize.LONG;
-        } else if (valueTid.length == LONG_SIZE_IN_BYTES) {
+        } else if (length == LONG_SIZE_IN_BYTES) {
             return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER;
         }
         return IntegerSize.BIG_INTEGER;
@@ -965,6 +995,49 @@ public int intValue() {
         return (int) longValue();
     }
 
+    // IEEE-754 half-precision (s=sign, e=exponent, f=fraction): seee_eeff_ffff_ffff
+    private static final int FLOAT_16_SIGN_MASK              = 0b1000_0000_0000_0000;
+    private static final int FLOAT_16_EXPONENT_MASK          = 0b0111_1100_0000_0000;
+    private static final int FLOAT_16_FRACTION_MASK          = 0b0000_0011_1111_1111;
+
+    // float64 bias: 1023; float16 bias: 15. Shift left to align with the masked exponent bits.
+    private static final int FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION = (1023 - 15) << Integer.numberOfTrailingZeros(FLOAT_16_EXPONENT_MASK);
+    // The float16 sign bit has bit index 15; the float64 sign bit has bit index 63.
+    private static final int FLOAT_16_TO_64_SIGN_SHIFT = 63 - 15;
+    // The 5 float16 exponent bits start at index 10; the 11 float64 exponent bits start at index 52.
+    private static final int FLOAT_16_TO_64_EXPONENT_SHIFT = 52 - 10;
+    // The most significant float16 fraction bit is at index 9; the most significant float64 fraction bit is at index 51.
+    private static final int FLOAT_16_TO_64_FRACTION_SHIFT = 51 - 9;
+
+    /**
+     * Reads the next two bytes from the given ByteBuffer as a 16-bit float, returning the value as a Java double.
+     * @param byteBuffer a buffer positioned at the first byte of the 16-bit float.
+     * @return the value.
+     */
+    private static double readFloat16(ByteBuffer byteBuffer) {
+        int bits = byteBuffer.getShort() & TWO_BYTE_MASK;
+        int sign = bits & FLOAT_16_SIGN_MASK;
+        int exponent = bits & FLOAT_16_EXPONENT_MASK;
+        int fraction = bits & FLOAT_16_FRACTION_MASK;
+        if (exponent == 0) {
+            if (fraction == 0) {
+                return sign == 0 ? -0e0 : 0e0;
+            }
+            // Denormalized
+            throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added.");
+        } else if ((exponent ^ FLOAT_16_EXPONENT_MASK) == 0) {
+            if (fraction == 0) {
+                return sign == 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
+            }
+            return Double.NaN;
+        }
+        return Double.longBitsToDouble(
+              ((long) sign << FLOAT_16_TO_64_SIGN_SHIFT)
+            | ((long) (exponent + FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION) << FLOAT_16_TO_64_EXPONENT_SHIFT)
+            | ((long) fraction << FLOAT_16_TO_64_FRACTION_SHIFT)
+        );
+    }
+
     @Override
     public double doubleValue() {
         double value;
@@ -975,7 +1048,12 @@ public double doubleValue() {
                 return 0.0d;
             }
             ByteBuffer bytes = prepareByteBuffer(valueMarker.startIndex, valueMarker.endIndex);
-            if (length == FLOAT_32_BYTE_LENGTH) {
+            if (length == FLOAT_16_BYTE_LENGTH) {
+                if (minorVersion == 0) {
+                    throw new IonException("Ion 1.0 floats may may only have length 0, 4, or 8.");
+                }
+                value = readFloat16(bytes);
+            } else if (length == FLOAT_32_BYTE_LENGTH) {
                 value = bytes.getFloat();
             } else {
                 // Note: there is no need to check for other lengths here; the type ID byte is validated during next().