Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: statistical functions should return null when provided a vector of only null values #5606

Merged
merged 19 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand Down Expand Up @@ -87,24 +88,25 @@ private boolean addChunk(ByteChunk<? extends Values> values, long destination, i
final double sum = SumByteChunk.sum2ByteChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else {
final long nonNullCount = nonNullCounter.getCountUnsafe(destination);
if (nonNullCount == 0) {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
Expand All @@ -120,12 +122,12 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -135,15 +137,15 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand Down Expand Up @@ -83,24 +84,25 @@ private boolean addChunk(CharChunk<? extends Values> values, long destination, i
final double sum = SumCharChunk.sum2CharChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else {
final long nonNullCount = nonNullCounter.getCountUnsafe(destination);
if (nonNullCount == 0) {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
Expand All @@ -116,12 +118,12 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -131,15 +133,15 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,23 +97,23 @@ private boolean addChunk(DoubleChunk<? extends Values> values, long destination,
final boolean forceNanResult = totalNegativeInfinities > 0 || totalPositiveInfinities > 0 || totalNanCount > 0;

if (chunkNormalCount.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final double newSum = NullSafeAddition.plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = NullSafeAddition.plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

Assert.neqZero(nonNullCount, "nonNullCount");
if (forceNanResult || nonNullCount == 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (forceNanResult || totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
// If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever.
if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) {
resultColumn.set(destination, Double.NaN);
return true;
}
final double variance = computeVariance(nonNullCount, newSum, newSum2);
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
return true;
Expand Down Expand Up @@ -176,15 +176,17 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
}
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);
} else if (totalNormalCount == 1 || forceNanResult) {
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}

if (totalNormalCount == 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}

// If the sum has reach +/-Infinity, we are stuck with NaN forever.
Expand All @@ -196,6 +198,7 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
// Perform the calculation in a way that minimizes the impact of FP error.
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,23 +93,23 @@ private boolean addChunk(FloatChunk<? extends Values> values, long destination,
final boolean forceNanResult = totalNegativeInfinities > 0 || totalPositiveInfinities > 0 || totalNanCount > 0;

if (chunkNormalCount.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final double newSum = NullSafeAddition.plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = NullSafeAddition.plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

Assert.neqZero(nonNullCount, "nonNullCount");
if (forceNanResult || nonNullCount == 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (forceNanResult || totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
// If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever.
if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) {
resultColumn.set(destination, Double.NaN);
return true;
}
final double variance = computeVariance(nonNullCount, newSum, newSum2);
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
return true;
Expand Down Expand Up @@ -172,15 +172,17 @@ private boolean removeChunk(FloatChunk<? extends Values> values, long destinatio
}
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);
} else if (totalNormalCount == 1 || forceNanResult) {
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}

if (totalNormalCount == 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}

// If the sum has reach +/-Infinity, we are stuck with NaN forever.
Expand All @@ -192,6 +194,7 @@ private boolean removeChunk(FloatChunk<? extends Values> values, long destinatio
// Perform the calculation in a way that minimizes the impact of FP error.
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand Down Expand Up @@ -87,24 +88,25 @@ private boolean addChunk(IntChunk<? extends Values> values, long destination, in
final double sum = SumIntChunk.sum2IntChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else {
final long nonNullCount = nonNullCounter.getCountUnsafe(destination);
if (nonNullCount == 0) {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
Expand All @@ -120,12 +122,12 @@ private boolean removeChunk(IntChunk<? extends Values> values, long destination,
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -135,15 +137,15 @@ private boolean removeChunk(IntChunk<? extends Values> values, long destination,
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else if (nonNullCount == 1) {
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Loading