Skip to content

Commit

Permalink
Adds support for writing Ion 1.1 text, lobs, and some annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt committed Nov 10, 2023
1 parent 98acd33 commit 2300e2a
Show file tree
Hide file tree
Showing 4 changed files with 322 additions and 2 deletions.
128 changes: 128 additions & 0 deletions src/com/amazon/ion/impl/bin/IonEncoder_1_1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package com.amazon.ion.impl.bin;

import com.amazon.ion.Decimal;
import com.amazon.ion.IonText;
import com.amazon.ion.IonType;
import com.amazon.ion.Timestamp;
import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder;
import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;

import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*;
import static java.lang.Double.doubleToRawLongBits;
Expand Down Expand Up @@ -428,4 +432,128 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) {
return 1 + WriteBuffer.flexUIntLength(dataLength) + dataLength;
}

/**
* Writes a String to the given WriteBuffer using the Ion 1.1 encoding for Ion Strings.
* @return the number of bytes written
*/
public static int writeStringValue(WriteBuffer buffer, String value) {
return writeInlineText(buffer, value, IonType.STRING, OpCodes.STRING_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_STRING);
}

/**
* Writes an inline Symbol to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols.
* @return the number of bytes written
*/
public static int writeSymbolValue(WriteBuffer buffer, String value) {
return writeInlineText(buffer, value, IonType.SYMBOL, OpCodes.INLINE_SYMBOL_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL);
}

private static int writeInlineText(WriteBuffer buffer, String value, IonType type, byte zeroLengthOpCode, byte variableLengthOpCode) {
if (value == null) {
return writeNullValue(buffer, type);
}

// TODO: When merging into the Ion 1.1 raw writer, keep a single instance of the Utf8StringEncoder
// instead of fetching one on every call.
Utf8StringEncoder.Result encoderResult = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value);

byte[] utf8Buffer = encoderResult.getBuffer();
int numValueBytes = encoderResult.getEncodedLength();
int numLengthBytes = 0;

if (numValueBytes <= 0xF) {
buffer.writeByte((byte)(zeroLengthOpCode | numValueBytes));
} else {
buffer.writeByte(variableLengthOpCode);
numLengthBytes = buffer.writeFlexUInt(numValueBytes);
}
buffer.writeBytes(utf8Buffer, 0, numValueBytes);
return 1 + numLengthBytes + numValueBytes;
}

/**
* Writes an interned Symbol's address to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols.
* @return the number of bytes written
*
* TODO: Do we need to support Symbol Addresses greater than Long.MAX_VALUE?
*/
public static int writeSymbolValue(WriteBuffer buffer, long value) {
if (value < 0) {
throw new IllegalArgumentException("Symbol Address cannot be negative; was: " + value);
} else if (value < FIRST_2_BYTE_SYMBOL_ADDRESS) {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_1_BYTE);
buffer.writeFixedUInt(value);
return 2;
} else if (value < FIRST_MANY_BYTE_SYMBOL_ADDRESS) {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_2_BYTES);
buffer.writeFixedIntOrUInt(value - FIRST_2_BYTE_SYMBOL_ADDRESS, 2);
return 3;
} else {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_MANY_BYTES);
int addressBytes = buffer.writeFlexUInt(value - FIRST_MANY_BYTE_SYMBOL_ADDRESS);
return 1 + addressBytes;
}
}

/**
* Writes a Blob to the given WriteBuffer using the Ion 1.1 encoding for Ion Blobs.
* @return the number of bytes written
*/
public static int writeBlobValue(WriteBuffer buffer, byte[] value) {
if (value == null) {
return writeNullValue(buffer, IonType.BLOB);
}

buffer.writeByte(OpCodes.VARIABLE_LENGTH_BLOB);
int numLengthBytes = buffer.writeFlexUInt(value.length);
buffer.writeBytes(value);
return 1 + numLengthBytes + value.length;
}

/**
* Writes a Clob to the given WriteBuffer using the Ion 1.1 encoding for Ion Clobs.
* @return the number of bytes written
*/
public static int writeClobValue(WriteBuffer buffer, byte[] value) {
if (value == null) {
return writeNullValue(buffer, IonType.CLOB);
}

buffer.writeByte(OpCodes.VARIABLE_LENGTH_CLOB);
int numLengthBytes = buffer.writeFlexUInt(value.length);
buffer.writeBytes(value);
return 1 + numLengthBytes + value.length;
}

// TODO: Implement FlexSym Annotations

/**
* Writes annotations using the given symbol addresses.
*/
public static int writeAnnotations(WriteBuffer buffer, long[] annotations) {
if (annotations == null || annotations.length == 0) {
return 0;
}
if (annotations.length == 1) {
buffer.writeByte(OpCodes.ANNOTATIONS_1_SYMBOL_ADDRESS);
int numAddressBytes = buffer.writeFlexUInt(annotations[0]);
return 1 + numAddressBytes;
} else if (annotations.length == 2) {
buffer.writeByte(OpCodes.ANNOTATIONS_2_SYMBOL_ADDRESS);
int numAddressBytes = buffer.writeFlexUInt(annotations[0]);
numAddressBytes += buffer.writeFlexUInt(annotations[1]);
return 1 + numAddressBytes;
} else {
int numAddressBytes = 0;
for (long ann : annotations) {
numAddressBytes += WriteBuffer.flexUIntLength(ann);
}
buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS);
int numLengthBytes = buffer.writeFlexUInt(numAddressBytes);
for (long ann : annotations) {
buffer.writeFlexUInt(ann);
}
return 1 + numLengthBytes + numAddressBytes;
}
}
}
3 changes: 3 additions & 0 deletions src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
public class Ion_1_1_Constants {
private Ion_1_1_Constants() {}

static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256;
static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792;

//////// Timestamp Field Constants ////////

// S_TIMESTAMP_* is applicable to all short-form timestamps
Expand Down
20 changes: 20 additions & 0 deletions src/com/amazon/ion/impl/bin/OpCodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,30 @@ private OpCodes() {}
public static final byte TIMESTAMP_NANOS_PRECISION_WITH_OFFSET = 0x7C;
// 0x7D-0x7F Reserved

public static final byte STRING_ZERO_LENGTH = (byte) 0x80;

public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0x90;

public static final byte SYMBOL_ADDRESS_1_BYTE = (byte) 0xE1;
public static final byte SYMBOL_ADDRESS_2_BYTES = (byte) 0xE2;
public static final byte SYMBOL_ADDRESS_MANY_BYTES = (byte) 0xE3;
public static final byte ANNOTATIONS_1_SYMBOL_ADDRESS = (byte) 0xE4;
public static final byte ANNOTATIONS_2_SYMBOL_ADDRESS = (byte) 0xE5;
public static final byte ANNOTATIONS_MANY_SYMBOL_ADDRESS = (byte) 0xE6;
public static final byte ANNOTATIONS_1_FLEX_SYM = (byte) 0xE7;
public static final byte ANNOTATIONS_2_FLEX_SYM = (byte) 0xE8;
public static final byte ANNOTATIONS_MANY_FLEX_SYM = (byte) 0xE9;
public static final byte NULL_UNTYPED = (byte) 0xEA;
public static final byte NULL_TYPED = (byte) 0xEB;
// 0xEC, 0xED NOP
// 0xEE Reserved
// 0xEF System Macro Invocation

public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF5;
public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF6;
public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF7;
public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF8;
public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xF9;
public static final byte VARIABLE_LENGTH_BLOB = (byte) 0xFE;
public static final byte VARIABLE_LENGTH_CLOB = (byte) 0xFF;
}
173 changes: 171 additions & 2 deletions test/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;

public class IonEncoder_1_1Test {
Expand Down Expand Up @@ -455,6 +457,127 @@ public void testWriteTimestampValueForNullTimestamp() {
Assertions.assertEquals(2, numBytes);
}

@ParameterizedTest
@CsvSource({
"'', 80",
"'a', 81 61",
"'ab', 82 61 62",
"'abc', 83 61 62 63",
"'fourteen bytes', 8E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73",
"'this has sixteen', F8 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E",
"'variable length encoding', F8 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67",
})
public void testWriteStringValue(String value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeStringValue);
}

@Test
public void testWriteStringValueForNull() {
int numBytes = IonEncoder_1_1.writeStringValue(buf, null);
Assertions.assertEquals("EB 05", byteArrayToHex(bytes()));
Assertions.assertEquals(2, numBytes);
}

@ParameterizedTest
@CsvSource({
"'', 90",
"'a', 91 61",
"'ab', 92 61 62",
"'abc', 93 61 62 63",
"'fourteen bytes', 9E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73",
"'this has sixteen', F9 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E",
"'variable length encoding', F9 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67",
})
public void testWriteSymbolValue(String value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue);
}

@ParameterizedTest
@CsvSource({
"0, E1 00",
"1, E1 01",
"255, E1 FF",
"256, E2 00 00",
"257, E2 01 00",
"512, E2 00 01",
"513, E2 01 01",
"65535, E2 FF FE",
"65791, E2 FF FF",
"65792, E3 01",
"65793, E3 03",
"65919, E3 FF",
"65920, E3 02 02",
"9223372036854775807, E3 00 FF FD FD FF FF FF FF FF"
})
public void testWriteSymbolValue(long value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue);
}

@Test
public void testWriteSymbolValueForNull() {
int numBytes = IonEncoder_1_1.writeSymbolValue(buf, null);
Assertions.assertEquals("EB 06", byteArrayToHex(bytes()));
Assertions.assertEquals(2, numBytes);
}

@ParameterizedTest
@CsvSource({
"'', FE 01", //
"20, FE 03 20",
"49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " +
"FE 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79"
})
public void testWriteBlobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeBlobValue);
}

@Test
public void testWriteBlobValueForNull() {
int numBytes = IonEncoder_1_1.writeBlobValue(buf, null);
Assertions.assertEquals("EB 07", byteArrayToHex(bytes()));
Assertions.assertEquals(2, numBytes);
}

@ParameterizedTest
@CsvSource({
"'', FF 01",
"20, FF 03 20",
"49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " +
"FF 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79"
})
public void testWriteClobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeClobValue);
}

@Test
public void testWriteClobValueForNull() {
int numBytes = IonEncoder_1_1.writeClobValue(buf, null);
Assertions.assertEquals("EB 08", byteArrayToHex(bytes()));
Assertions.assertEquals(2, numBytes);
}

@ParameterizedTest
@CsvSource({
" '', ''", // Empty array of annotations
" $0, E4 01",
" $10, E4 15",
" $256, E4 02 04",
" $10 $11, E5 15 17",
" $256 $257, E5 02 04 06 04",
" $10 $11 $12, E6 07 15 17 19",
"$256 $257 $258, E6 0D 02 04 06 04 0A 04",
})
public void testWriteAnnotations(@ConvertWith(SymbolIdsToLongArray.class) long[] value, String expectedBytes) {
assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeAnnotations);
}

@Test
public void testWriteAnnotationsForNull() {
int numBytes = IonEncoder_1_1.writeAnnotations(buf, null);
Assertions.assertEquals("", byteArrayToHex(bytes()));
Assertions.assertEquals(0, numBytes);
}

/**
* Utility method to make it easier to write test cases that assert specific sequences of bytes.
*/
Expand All @@ -470,7 +593,7 @@ private static String byteArrayToHex(byte[] bytes) {
* Determines the number of bytes needed to represent a series of hexadecimal digits.
*/
private static int byteLengthFromHexString(String hexString) {
return (hexString.replaceAll("[^\\dA-F]", "").length() - 1) / 2 + 1;
return (hexString.replaceAll("[^\\dA-F]", "").length()) / 2;
}

/**
Expand All @@ -496,7 +619,7 @@ private static String byteArrayToBitString(byte[] bytes) {
* Determines the number of bytes needed to represent a series of hexadecimal digits.
*/
private static int byteLengthFromBitString(String bitString) {
return (bitString.replaceAll("[^01]", "").length() - 1) / 8 + 1;
return (bitString.replaceAll("[^01]", "").length()) / 8;
}

/**
Expand Down Expand Up @@ -528,4 +651,50 @@ protected Decimal convert(String source) throws ArgumentConversionException {
return Decimal.valueOf(source);
}
}

/**
* Converts a Hex String to a Byte Array for a @Parameterized test
*/
static class HexStringToByteArray extends TypedArgumentConverter<String, byte[]> {
protected HexStringToByteArray() {
super(String.class, byte[].class);
}

@Override
protected byte[] convert(String source) throws ArgumentConversionException {
if (source == null) return null;
if (source.trim().isEmpty()) return new byte[0];
String[] octets = source.split(" ");
byte[] result = new byte[octets.length];
for (int i = 0; i < octets.length; i++) {
result[i] = (byte) Integer.parseInt(octets[i], 16);
}
return result;
}
}

/**
* Converts a String of symbol ids to a long[] for a @Parameterized test
*/
static class SymbolIdsToLongArray extends TypedArgumentConverter<String, long[]> {
protected SymbolIdsToLongArray() {
super(String.class, long[].class);
}

@Override
protected long[] convert(String source) throws ArgumentConversionException {
if (source == null) return null;
List<Long> temp = new ArrayList<>();
String[] sids = source.split("\\$");
for (String sid : sids) {
if (sid.isEmpty()) continue;
temp.add(Long.parseLong(sid.trim()));
}
long[] result = new long[temp.size()];
for (int i = 0; i < temp.size(); i++) {
result[i] = temp.get(i);
}
return result;
}
}
}

0 comments on commit 2300e2a

Please sign in to comment.