Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Support whole range of byte values for booleans #6623

Merged
merged 8 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
public class BooleanUtils {

/**
* The byte encoding of null booleans.
* The byte encoding of null booleans. Do not use to compare values, use {@link #isNull(byte)} instead.
*/
public static final byte NULL_BOOLEAN_AS_BYTE = QueryConstants.NULL_BYTE;

/**
* The byte encoding of true booleans.
* The byte encoding of true booleans. Do not use to compare values, use {@link #byteAsBoolean(byte)} instead.
*/
public static final byte TRUE_BOOLEAN_AS_BYTE = (byte) 1;

/**
* The byte encoding of false booleans.
* The byte encoding of false booleans. This is the only safe value to use when comparing byte representations.
*/
public static final byte FALSE_BOOLEAN_AS_BYTE = (byte) 0;

Expand All @@ -36,7 +36,17 @@ public class BooleanUtils {
* @return the boxed boolean represented by byteValue
*/
public static Boolean byteAsBoolean(final byte byteValue) {
return byteValue == FALSE_BOOLEAN_AS_BYTE ? Boolean.FALSE : byteValue > 0 ? Boolean.TRUE : null;
return isNull(byteValue) ? null : byteValue > FALSE_BOOLEAN_AS_BYTE;
}

/**
* Check if a byte represents a null boolean.
*
* @param byteValue the byte to check if it represents a null boolean
* @return true if byteValue represents a null boolean
*/
public static boolean isNull(final byte byteValue) {
return byteValue < 0;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ void nullByRanges(@NotNull final RowSequence rowSequence) {
boolean prevRequired = false;
for (int jj = 0; jj < length; ++jj) {
final int indexWithinBlock = sIndexWithinBlock + jj;
if (block[indexWithinBlock] != NULL_BOOLEAN_AS_BYTE) {
if (!BooleanUtils.isNull(block[indexWithinBlock])) {
prevRequired = true;
break;
}
Expand Down Expand Up @@ -916,7 +916,7 @@ void nullByKeys(@NotNull final RowSequence rowSequence) {
if (hasPrev) {

final byte oldValue = block[indexWithinBlock];
if (oldValue != NULL_BOOLEAN_AS_BYTE) {
if (!BooleanUtils.isNull(oldValue)) {
if (prevBlock.getValue() == null) {
prevBlock.setValue(ensurePrevBlock(firstKey, block0, block1, block2));
inUse.setValue(prevInUse.get(block0).get(block1).get(block2));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import io.deephaven.engine.table.impl.sources.BooleanArraySource;
import io.deephaven.engine.table.impl.sources.BooleanSparseArraySource;
import io.deephaven.engine.table.WritableColumnSource;
import io.deephaven.util.BooleanUtils;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.ByteChunk;
Expand All @@ -23,7 +24,6 @@
import io.deephaven.engine.table.impl.updateby.internal.BaseByteUpdateByOperator;
import org.jetbrains.annotations.NotNull;

import static io.deephaven.util.BooleanUtils.NULL_BOOLEAN_AS_BYTE;

public class BooleanFillByOperator extends BaseByteUpdateByOperator {
// region extra-fields
Expand All @@ -46,7 +46,7 @@ public void push(int pos, int count) {
Assert.eq(count, "push count", 1);

byte val = booleanValueChunk.get(pos);
if(val != NULL_BOOLEAN_AS_BYTE) {
if(!BooleanUtils.isNull(val)) {
curVal = val;
}
}
Expand Down Expand Up @@ -80,7 +80,7 @@ public UpdateByOperator.Context makeUpdateContext(final int affectedChunkSize, f
// region extra-methods
@Override
protected byte getNullValue() {
return NULL_BOOLEAN_AS_BYTE;
return BooleanUtils.NULL_BOOLEAN_AS_BYTE;
}
@Override
protected WritableColumnSource<Byte> makeSparseSource() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import java.io.IOException;
import java.io.OutputStream;

import static io.deephaven.util.QueryConstants.*;

public class BooleanChunkInputStreamGenerator extends BaseChunkInputStreamGenerator<ByteChunk<Values>> {
private static final String DEBUG_NAME = "BooleanChunkInputStreamGenerator";

Expand Down Expand Up @@ -60,7 +58,7 @@ public int nullCount() {
if (cachedNullCount == -1) {
cachedNullCount = 0;
subset.forAllRowKeys(row -> {
if (chunk.get((int) row) == NULL_BYTE) {
if (BooleanUtils.isNull(chunk.get((int) row))) {
++cachedNullCount;
}
});
Expand Down Expand Up @@ -117,7 +115,7 @@ public int drainTo(final OutputStream outputStream) throws IOException {

if (sendValidityBuffer()) {
subset.forAllRowKeys(row -> {
if (chunk.get((int) row) != NULL_BYTE) {
if (!BooleanUtils.isNull(chunk.get((int) row))) {
context.accumulator |= 1L << context.count;
}
if (++context.count == 64) {
Expand All @@ -132,9 +130,9 @@ public int drainTo(final OutputStream outputStream) throws IOException {

// write the included values
subset.forAllRowKeys(row -> {
final byte byteValue = chunk.get((int) row);
if (byteValue != NULL_BYTE) {
context.accumulator |= (byteValue > 0 ? 1L : 0L) << context.count;
final Boolean value = BooleanUtils.byteAsBoolean(chunk.get((int) row));
cpwright marked this conversation as resolved.
Show resolved Hide resolved
if (value != null) {
context.accumulator |= (value ? 1L : 0L) << context.count;
}
if (++context.count == 64) {
flush.run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,35 @@ public void testBooleanChunkSerialization() throws IOException {
}
}

public void testBooleanChunkSerializationNonStandardNulls() throws IOException {
for (final BarrageSubscriptionOptions opts : options) {
testRoundTripSerialization(opts, boolean.class, (utO) -> {
final WritableByteChunk<Values> chunk = utO.asWritableByteChunk();
for (int i = 0; i < chunk.size(); ++i) {
chunk.set(i, (byte) i);
}
}, (utO, utC, subset, offset) -> {
final WritableByteChunk<Values> original = utO.asWritableByteChunk();
final WritableByteChunk<Values> computed = utC.asWritableByteChunk();
if (subset == null) {
for (int i = 0; i < original.size(); ++i) {
Boolean origBoolean = BooleanUtils.byteAsBoolean(original.get(i));
Boolean computedBoolean = BooleanUtils.byteAsBoolean(computed.get(offset + i));
Assert.nullSafeEquals(origBoolean, "origBoolean", computedBoolean, "computedBoolean");
}
} else {
final MutableInt off = new MutableInt();
subset.forAllRowKeys(key -> {
Boolean origBoolean = BooleanUtils.byteAsBoolean(original.get((int) key));
Boolean computedBoolean =
BooleanUtils.byteAsBoolean(computed.get(offset + off.getAndIncrement()));
Assert.nullSafeEquals(origBoolean, "origBoolean", computedBoolean, "computedBoolean");
});
}
});
}
}

public void testByteChunkSerialization() throws IOException {
final Random random = new Random(0);
for (final BarrageSubscriptionOptions opts : options) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ private byte parseFromString(JsonParser parser) throws IOException {
checkStringAllowed(parser);
if (!allowNull()) {
final byte res = Parsing.parseStringAsByteBool(parser, BooleanUtils.NULL_BOOLEAN_AS_BYTE);
if (res == BooleanUtils.NULL_BOOLEAN_AS_BYTE) {
if (BooleanUtils.isNull(res)) {
throw nullNotAllowed(parser);
}
return res;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
package io.deephaven.parquet.base;

import io.deephaven.util.QueryConstants;
import io.deephaven.util.BooleanUtils;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.statistics.Statistics;
Expand Down Expand Up @@ -90,11 +90,10 @@ public WriteResult writeBulkFilterNulls(@NotNull ByteBuffer bulkValues,
final int rowCount,
@NotNull final Statistics<?> statistics) throws IOException {
while (bulkValues.hasRemaining()) {
final byte next = bulkValues.get();
if (next != QueryConstants.NULL_BYTE) {
final boolean v = next == 1;
writeBoolean(v);
statistics.updateStats(v);
final Boolean next = BooleanUtils.byteAsBoolean(bulkValues.get());
if (next != null) {
writeBoolean(next);
statistics.updateStats(next);
dlEncoder.writeInt(DL_ITEM_PRESENT);
} else {
statistics.incrementNumNulls();
Expand All @@ -111,11 +110,10 @@ public WriteResult writeBulkFilterNulls(@NotNull ByteBuffer bulkValues,
nullOffsets.clear();
int i = 0;
while (bulkValues.hasRemaining()) {
final byte next = bulkValues.get();
if (next != QueryConstants.NULL_BYTE) {
final boolean v = next == 1;
writeBoolean(v);
statistics.updateStats(v);
final Boolean next = BooleanUtils.byteAsBoolean(bulkValues.get());
if (next != null) {
writeBoolean(next);
statistics.updateStats(next);
} else {
nullOffsets = Helpers.ensureCapacity(nullOffsets);
nullOffsets.put(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,19 @@
import io.deephaven.UncheckedDeephavenException;
import io.deephaven.base.FileUtils;
import io.deephaven.engine.context.ExecutionContext;
import io.deephaven.engine.rowset.RowSetFactory;
import io.deephaven.engine.rowset.TrackingRowSet;
import io.deephaven.engine.table.ColumnDefinition;
import io.deephaven.engine.table.ColumnSource;
import io.deephaven.engine.table.Table;
import io.deephaven.engine.table.TableDefinition;
import io.deephaven.engine.table.impl.InMemoryTable;
import io.deephaven.engine.table.impl.QueryTable;
import io.deephaven.engine.table.impl.UncoalescedTable;
import io.deephaven.engine.table.impl.indexer.DataIndexer;
import io.deephaven.engine.table.impl.locations.TableDataException;
import io.deephaven.engine.table.impl.sources.ByteArraySource;
import io.deephaven.engine.table.impl.sources.ReinterpretUtils;
import io.deephaven.engine.table.impl.util.ColumnHolder;
import io.deephaven.engine.table.vectors.ColumnVectors;
import io.deephaven.engine.testutil.junit4.EngineCleanup;
Expand Down Expand Up @@ -276,6 +282,24 @@ public void testWriteTableMissingColumns() {
result.close();
}

@Test
public void testWriteBooleanValues() {
TrackingRowSet rowSet = RowSetFactory.fromRange(0, 499).toTracking();
ByteArraySource source = new ByteArraySource();
source.ensureCapacity(rowSet.size(), false);
rowSet.forAllRowKeys(i -> {
source.set(i, (byte) i);
});
ColumnSource<Boolean> column = ReinterpretUtils.byteToBooleanSource(source);
Map<String, ? extends ColumnSource<?>> columns = Map.of("Bool", column);
QueryTable table = new QueryTable(rowSet, columns);
final File dest = new File(testRoot + File.separator + "boolean.parquet");
ParquetTools.writeTable(table, dest.getPath());
final Table result = ParquetTools.readTable(dest.getPath());
assertTableEquals(table, result);
result.close();
}

@Test
public void testWriteTableExceptions() throws IOException {
new File(testRoot + File.separator + "unexpectedFile").createNewFile();
Expand Down
2 changes: 1 addition & 1 deletion qst/src/main/java/io/deephaven/qst/array/BooleanArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public final Boolean value(int index) {

@Override
public boolean isNull(int index) {
return values[index] == BooleanUtils.NULL_BOOLEAN_AS_BYTE;
return BooleanUtils.isNull(values[index]);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1157,8 +1157,12 @@ private static void replicateBooleanSparseArraySource() throws IOException {
"ObjectChunk<[?] super Values>", "ObjectChunk<Boolean, ? super Values>");
lines = simpleFixup(lines, "primitive get", "NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE", "getBoolean", "getByte",
"getPrevBoolean", "getPrevByte");
lines = simpleFixup(lines, "nullByKeys", "NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");
lines = simpleFixup(lines, "nullByRanges", "NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");
lines = simpleFixup(lines, "nullByKeys",
"oldValue != NULL_BOOLEAN", "!BooleanUtils.isNull(oldValue)",
"NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");
lines = simpleFixup(lines, "nullByRanges",
"block\\[indexWithinBlock\\] != NULL_BOOLEAN", "!BooleanUtils.isNull(block[indexWithinBlock])",
"NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");
lines = simpleFixup(lines, "setNull", "NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");

lines = replaceRegion(lines, "copyFromTypedArray", Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,14 @@ private static void fixupByteBase(String byteResult) throws IOException {
private static void fixupBoolean(String boolResult) throws IOException {
final File objectFile = new File(boolResult);
List<String> lines = FileUtils.readLines(objectFile, Charset.defaultCharset());
lines = removeImport(lines, "import static io.deephaven.util.QueryConstants.NULL_BOOLEAN;");
lines = addImport(lines, "import io.deephaven.engine.table.ColumnSource;",
"import java.util.Map;",
"import java.util.Collections;",
"import io.deephaven.engine.table.impl.sources.BooleanArraySource;",
"import io.deephaven.engine.table.impl.sources.BooleanSparseArraySource;",
"import io.deephaven.engine.table.WritableColumnSource;");
"import io.deephaven.engine.table.WritableColumnSource;",
"import io.deephaven.util.BooleanUtils;");

lines = globalReplacements(lines,
"BaseBooleanUpdateByOperator", "BaseByteUpdateByOperator",
Expand All @@ -343,14 +345,12 @@ private static void fixupBoolean(String boolResult) throws IOException {
"boolean previousVal", "byte previousVal",
"boolean currentVal", "byte currentVal",
"BooleanChunk", "ByteChunk",
"NULL_BOOLEAN", "NULL_BOOLEAN_AS_BYTE");
lines = globalReplacements(lines,
"!BooleanPrimitives\\.isNull\\(currentVal\\)", "currentVal != NULL_BOOLEAN_AS_BYTE");
"val != NULL_BOOLEAN", "!BooleanUtils.isNull(val)");
lines = replaceRegion(lines, "extra-methods",
Collections.singletonList(
" @Override\n" +
" protected byte getNullValue() {\n" +
" return NULL_BOOLEAN_AS_BYTE;\n" +
" return BooleanUtils.NULL_BOOLEAN_AS_BYTE;\n" +
" }\n" +
" @Override\n" +
" protected WritableColumnSource<Byte> makeSparseSource() {\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import elemental2.core.TypedArray;
import elemental2.core.Uint16Array;
import elemental2.core.Uint8Array;
import io.deephaven.util.BooleanUtils;
import io.deephaven.web.client.api.LongWrapper;
import io.deephaven.web.client.api.i18n.JsDateTimeFormat;
import io.deephaven.web.client.api.i18n.JsTimeZone;
Expand Down Expand Up @@ -416,15 +417,11 @@ public void write(Object[] data, ParseContext context, JsConsumer<Node> addNode,
}
}

if (boolValue != FALSE_BOOLEAN_AS_BYTE && boolValue != TRUE_BOOLEAN_AS_BYTE
&& boolValue != NULL_BOOLEAN_AS_BYTE) {
throw new IllegalArgumentException("Can't handle " + val + " as a boolean value");
}

// write the value, and mark non-null if necessary
if (boolValue != NULL_BOOLEAN_AS_BYTE) {
Boolean b = BooleanUtils.byteAsBoolean(boolValue);
if (b != null) {
validity.set(i);
if (boolValue == TRUE_BOOLEAN_AS_BYTE) {
if (b) {
payload.set(i);
}
} else {
Expand Down