Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for reading binary Ion 1.1 annotations. #666

Merged
merged 4 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
422 changes: 394 additions & 28 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -88,7 +89,7 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina
private SymbolTable cachedReadOnlySymbolTable = null;

// The reusable annotation iterator.
private final AnnotationSequenceIterator annotationIterator = new AnnotationSequenceIterator();
private final AnnotationMarkerIterator annotationTextIterator = new AnnotationMarkerIterator();

// ------

Expand Down Expand Up @@ -166,38 +167,81 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina
/**
* Reusable iterator over the annotations on the current value.
*/
private class AnnotationSequenceIterator implements Iterator<String> {
private class AnnotationMarkerIterator implements Iterator<String> {

// All of the annotation SIDs on the current value.
private IntList annotationSids;
// The index into `annotationSids` containing the next annotation to be returned.
private int index = 0;
// TODO perf: try splitting into separate iterators for SIDs and FlexSyms
boolean isSids;
// The byte position of the annotation to return from the next call to next().
long nextAnnotationPeekIndex;

void reset() {
index = 0;
annotationSids = getAnnotationSidList();
}
long target;

@Override
public boolean hasNext() {
return index < annotationSids.size();
return nextAnnotationPeekIndex < target;
}

@Override
public String next() {
int sid = annotationSids.get(index);
String annotation = getSymbol(sid);
if (annotation == null) {
throw new UnknownSymbolException(sid);
if (isSids) {
long savedPeekIndex = peekIndex;
peekIndex = nextAnnotationPeekIndex;
int sid;
if (minorVersion == 0) {
byte b = buffer[(int) peekIndex++];
if (b < 0) {
sid = b & 0x7F;
} else {
sid = readVarUInt_1_0(b);
}
} else {
sid = (int) readFlexInt_1_1();
}
nextAnnotationPeekIndex = peekIndex;
peekIndex = savedPeekIndex;
return convertToString(sid);
}
index++;
return annotation;
Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++);
if (marker.startIndex < 0) {
// This means the endIndex represents the token's symbol ID.
return convertToString((int) marker.endIndex);
}
// The token is inline UTF-8 text.
java.nio.ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex);
return utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex));
}

SymbolToken nextSymbolToken() {
if (isSids) {
long savedPeekIndex = peekIndex;
peekIndex = nextAnnotationPeekIndex;
int sid = minorVersion == 0 ? readVarUInt_1_0() : (int) readFlexInt_1_1();
nextAnnotationPeekIndex = peekIndex;
peekIndex = savedPeekIndex;
return getSymbolToken(sid);
}
Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++);
if (marker.startIndex < 0) {
// This means the endIndex represents the token's symbol ID.
return getSymbolToken((int) marker.endIndex);
}
// The token is inline UTF-8 text.
ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex);
return new SymbolTokenImpl(utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex)), -1);
}

@Override
public void remove() {
throw new UnsupportedOperationException("This iterator does not support element removal.");
}

private String convertToString(int symbolId) {
String annotation = getSymbol(symbolId);
if (annotation == null) {
throw new UnknownSymbolException(symbolId);
}
return annotation;
}
}

/**
Expand Down Expand Up @@ -996,14 +1040,28 @@ public String[] getTypeAnnotations() {
if (!hasAnnotations) {
return _Private_Utils.EMPTY_STRING_ARRAY;
}
IntList annotationSids = getAnnotationSidList();
String[] annotationArray = new String[annotationSids.size()];
for (int i = 0; i < annotationArray.length; i++) {
String symbol = getSymbol(annotationSids.get(i));
if (symbol == null) {
throw new UnknownSymbolException(annotationSids.get(i));
if (annotationSequenceMarker.startIndex >= 0) {
if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) {
getAnnotationMarkerList();
} else {
IntList annotationSids = getAnnotationSidList();
String[] annotationArray = new String[annotationSids.size()];
for (int i = 0; i < annotationArray.length; i++) {
String symbol = getSymbol(annotationSids.get(i));
if (symbol == null) {
throw new UnknownSymbolException(annotationSids.get(i));
}
annotationArray[i] = symbol;
}
return annotationArray;
}
annotationArray[i] = symbol;
}
String[] annotationArray = new String[annotationTokenMarkers.size()];
annotationTextIterator.nextAnnotationPeekIndex = 0;
annotationTextIterator.target = annotationTokenMarkers.size();
annotationTextIterator.isSids = false;
while (annotationTextIterator.hasNext()) {
annotationArray[(int) annotationTextIterator.nextAnnotationPeekIndex] = annotationTextIterator.next();
}
return annotationArray;
}
Expand All @@ -1013,10 +1071,24 @@ public SymbolToken[] getTypeAnnotationSymbols() {
if (!hasAnnotations) {
return SymbolToken.EMPTY_ARRAY;
}
IntList annotationSids = getAnnotationSidList();
SymbolToken[] annotationArray = new SymbolToken[annotationSids.size()];
for (int i = 0; i < annotationArray.length; i++) {
annotationArray[i] = getSymbolToken(annotationSids.get(i));
if (annotationSequenceMarker.startIndex >= 0) {
if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) {
getAnnotationMarkerList();
} else {
IntList annotationSids = getAnnotationSidList();
SymbolToken[] annotationArray = new SymbolToken[annotationSids.size()];
for (int i = 0; i < annotationArray.length; i++) {
annotationArray[i] = getSymbolToken(annotationSids.get(i));
}
return annotationArray;
}
}
SymbolToken[] annotationArray = new SymbolToken[annotationTokenMarkers.size()];
annotationTextIterator.nextAnnotationPeekIndex = 0;
annotationTextIterator.target = annotationTokenMarkers.size();
annotationTextIterator.isSids = false;
while (annotationTextIterator.hasNext()) {
annotationArray[(int) annotationTextIterator.nextAnnotationPeekIndex] = annotationTextIterator.nextSymbolToken();
}
return annotationArray;
}
Expand Down Expand Up @@ -1044,12 +1116,28 @@ public Iterator<String> iterateTypeAnnotations() {
if (!hasAnnotations) {
return EMPTY_ITERATOR;
}
annotationIterator.reset();
return annotationIterator;
if (annotationSequenceMarker.startIndex >= 0) {
if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) {
// Note: this could be made more efficient by parsing from the marker sequence iteratively.
getAnnotationMarkerList();
} else {
annotationTextIterator.nextAnnotationPeekIndex = annotationSequenceMarker.startIndex;
annotationTextIterator.target = annotationSequenceMarker.endIndex;
annotationTextIterator.isSids = true;
return annotationTextIterator;
}
}
annotationTextIterator.nextAnnotationPeekIndex = 0;
annotationTextIterator.target = annotationTokenMarkers.size();
annotationTextIterator.isSids = false;
return annotationTextIterator;
}

@Override
public String getFieldName() {
if (fieldTextMarker.startIndex > -1) {
return getFieldText();
}
if (fieldSid < 0) {
return null;
}
Expand All @@ -1062,6 +1150,9 @@ public String getFieldName() {

@Override
public SymbolToken getFieldNameSymbol() {
if (fieldTextMarker.startIndex > -1) {
return new SymbolTokenImpl(getFieldText(), -1);
}
if (fieldSid < 0) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.amazon.ion.Timestamp;
import com.amazon.ion._private.SuppressFBWarnings;
import com.amazon.ion.impl.bin.IntList;
import com.amazon.ion.impl.bin.OpCodes;
import com.amazon.ion.impl.bin.utf8.Utf8StringDecoder;
import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool;

Expand Down Expand Up @@ -538,8 +539,42 @@ long readFlexInt_1_1() {
return readLargeFlexInt_1_1(currentByte);
}

private int readVarSym_1_1(Marker marker) {
throw new UnsupportedOperationException();
/**
* Reads a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym.
* When the FlexSym contains inline text, the given Marker's start and end indices are populated with the start and
* end of the UTF-8 byte sequence, and this method returns -1. When the FlexSym contains a symbol ID, the given
* Marker's endIndex is set to the symbol ID value and its startIndex is not set. When this FlexSym wraps a
* delimited end marker, neither the Marker's startIndex nor its endIndex is set.
* @param markerToSet the marker to populate.
* @return the symbol ID value if one was present, otherwise -1.
*/
private long readFlexSym_1_1(Marker markerToSet) {
// TODO find a factoring that reduces duplication with IonCursorBinary, taking into account performance.
long result = readFlexInt_1_1();
if (result == 0) {
int nextByte = buffer[(int)(peekIndex++)];
if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// Symbol zero.
markerToSet.endIndex = 0;
return 0;
}
if (nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
} else if (nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSym 0 may only precede symbol zero, empty string, or delimited end.");
}
return -1;
} else if (result < 0) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
peekIndex = markerToSet.endIndex;
return -1;
} else {
markerToSet.endIndex = result;
}
return result;
}

/**
Expand Down Expand Up @@ -1259,11 +1294,42 @@ public int[] getAnnotationIds() {
return annotationArray;
}

/**
* Gets the annotation markers for the current value, reading them from the buffer first if necessary.
* @return the annotation markers, or an empty list if the current value is not annotated.
*/
MarkerList getAnnotationMarkerList() {
annotationTokenMarkers.clear();
long savedPeekIndex = peekIndex;
peekIndex = annotationSequenceMarker.startIndex;
while (peekIndex < annotationSequenceMarker.endIndex) {
Marker provisionalMarker = annotationTokenMarkers.provisionalElement();
int annotationSid = (int) readFlexSym_1_1(provisionalMarker);
if (annotationSid >= 0) {
provisionalMarker.endIndex = annotationSid;
} else if (provisionalMarker.endIndex < 0) {
break;
}
annotationTokenMarkers.commit();
}
peekIndex = savedPeekIndex;
return annotationTokenMarkers;
}

@Override
public int getFieldId() {
return fieldSid;
}

/**
* Reads the text for the current field name.
* @return the field name text.
*/
String getFieldText() {
ByteBuffer utf8InputBuffer = prepareByteBuffer(fieldTextMarker.startIndex, fieldTextMarker.endIndex);
return utf8Decoder.decode(utf8InputBuffer, (int) (fieldTextMarker.endIndex - fieldTextMarker.startIndex));
}

@Override
public boolean isInStruct() {
return parent != null && parent.typeId.type == IonType.STRUCT;
Expand Down
16 changes: 14 additions & 2 deletions src/main/java/com/amazon/ion/impl/IonTypeID.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ final class IonTypeID {
private static final int ANNOTATION_WRAPPER_MAX_LENGTH = 0xE;
static final int ORDERED_STRUCT_NIBBLE = 0x1;

// Ion 1.1 annotation wrapper lower nibbles (upper nibble 0xE)
static final int ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1 = 0x4;
static final int TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1 = 0x5;
static final int ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1 = 0x7;
static final int TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1 = 0x8;

// NOTE: 'annotation wrapper' is not an IonType, but it is simplest to treat it as one for the purposes of this
// implementation in order to have a direct mapping from binary type IDs to IonType enum values. IonType.DATAGRAM
// does not have a type ID, so we will use it to mean 'annotation wrapper' instead.
Expand Down Expand Up @@ -65,7 +71,7 @@ final class IonTypeID {
IonType.LIST,
IonType.SEXP,
IonType.STRUCT, // symbol ID field names
IonType.STRUCT, // FlexSym field names
null, //IonType.STRUCT, // FlexSym field names // TODO see: https://github.com/amazon-ion/ion-docs/issues/292
null, // E: symbol ID, annotated value, NOP, null, system macro invocation
null // F: variable length macro, variable length of all types, delimited start/end
};
Expand All @@ -78,6 +84,7 @@ final class IonTypeID {
static final IonTypeID[] TYPE_IDS_1_0;
static final IonTypeID[] TYPE_IDS_1_1;
static final IonTypeID[] NULL_TYPE_IDS_1_1;
static final IonTypeID STRUCT_WITH_FLEX_SYMS_ID;
static {
TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES];
TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES];
Expand Down Expand Up @@ -105,14 +112,18 @@ final class IonTypeID {
NULL_TYPE_IDS_1_1[0x9] = TYPE_IDS_1_0[0xBF]; // null.list
NULL_TYPE_IDS_1_1[0xA] = TYPE_IDS_1_0[0xCF]; // null.sexp
NULL_TYPE_IDS_1_1[0xB] = TYPE_IDS_1_0[0xDF]; // null.struct

// This is used as a dummy ID when a struct switches to using FlexSym field names in the middle. The key
// here is that the type is STRUCT and the isInlineable flag is true.
STRUCT_WITH_FLEX_SYMS_ID = TYPE_IDS_1_1[VARIABLE_LENGTH_STRUCT_WITH_FLEX_SYMS & 0xFF];
}

final IonType type;
final int length;
final boolean variableLength;
final boolean isNull;
final boolean isNopPad;
final byte lowerNibble;
final byte lowerNibble; // TODO consider storing the entire byte rather than just the lower nibble
final boolean isValid;
final boolean isNegativeInt;
final boolean isMacroInvocation;
Expand Down Expand Up @@ -163,6 +174,7 @@ private static boolean isValid_1_1(byte id) {
|| id == (byte) 0xD1
|| id == (byte) 0xE0
|| id == (byte) 0xEE
|| (id & 0xF0) == 0xD0 // TODO see: https://github.com/amazon-ion/ion-docs/issues/292
);
}

Expand Down
Loading
Loading