Skip to content

Commit

Permalink
[CALCITE-3094] Code of method grows beyond 64 KB when joining two tab…
Browse files Browse the repository at this point in the history
…les with many fields

Change code generation such that when implementing joins across many fields up to a certain
threshold, populate the output array using System.arraycopy() instead of explicitly
instantiating an array with a large number of elements.
  • Loading branch information
jduo committed Jul 12, 2024
1 parent 374091b commit e7098f2
Show file tree
Hide file tree
Showing 5 changed files with 314 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.linq4j.JoinType;
import org.apache.calcite.linq4j.Nullness;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.linq4j.function.Function1;
import org.apache.calcite.linq4j.function.Function2;
Expand All @@ -30,12 +31,14 @@
import org.apache.calcite.linq4j.tree.BlockStatement;
import org.apache.calcite.linq4j.tree.ConstantExpression;
import org.apache.calcite.linq4j.tree.ConstantUntypedNull;
import org.apache.calcite.linq4j.tree.DeclarationStatement;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.ExpressionType;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.linq4j.tree.FunctionExpression;
import org.apache.calcite.linq4j.tree.MethodCallExpression;
import org.apache.calcite.linq4j.tree.MethodDeclaration;
import org.apache.calcite.linq4j.tree.NewArrayExpression;
import org.apache.calcite.linq4j.tree.ParameterExpression;
import org.apache.calcite.linq4j.tree.Primitive;
import org.apache.calcite.linq4j.tree.Types;
Expand All @@ -61,6 +64,7 @@

import org.checkerframework.checker.nullness.qual.Nullable;

import java.lang.reflect.Array;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.lang.reflect.Type;
Expand Down Expand Up @@ -160,6 +164,34 @@ static Expression joinSelector(JoinRelType joinType, PhysType physType,
// Generate all fields.
final List<Expression> expressions = new ArrayList<>();
final int outputFieldCount = physType.getRowType().getFieldCount();

// If there are many output fields, create the output dynamically so that the code size stays
// below the limit. See CALCITE-3094.
final boolean generateCompactCode = outputFieldCount >= 100;
final ParameterExpression compactOutputVar;
final BlockBuilder compactCode = new BlockBuilder();
if (generateCompactCode) {
Class<?> fieldClass = physType.fieldClass(0);
// If all fields have the same type, use the specific type. Otherwise just use Object.
for (int fieldIndex = 1; fieldIndex < outputFieldCount; ++fieldIndex) {
if (fieldClass != physType.fieldClass(fieldIndex)) {
fieldClass = Object.class;
break;
}
}

final Class<?> arrayClass = Array.newInstance(fieldClass, 0).getClass();
compactOutputVar = Expressions.variable(arrayClass, "outputArray");
final DeclarationStatement exp =
Expressions.declare(
0, compactOutputVar, new NewArrayExpression(fieldClass, 1,
Expressions.constant(outputFieldCount), null));
compactCode.add(exp);
} else {
compactOutputVar = null;
}

int outputField = 0;
for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) {
final PhysType inputPhysType =
ord.e.makeNullable(joinType.generatesNullsOn(ord.i));
Expand All @@ -175,6 +207,18 @@ static Expression joinSelector(JoinRelType joinType, PhysType physType,
break;
}
final int fieldCount = inputPhysType.getRowType().getFieldCount();
if (generateCompactCode) {
// use an array copy if possible
final Expression copyExpr =
Nullness.castNonNull(
inputPhysType.getFormat().copy(parameter, Nullness.castNonNull(compactOutputVar),
outputField, fieldCount));
compactCode.add(Expressions.statement(copyExpr));
outputField += fieldCount;
continue;
}

// otherwise access the fields individually
for (int i = 0; i < fieldCount; i++) {
Expression expression =
inputPhysType.fieldReference(parameter, i,
Expand All @@ -189,6 +233,34 @@ static Expression joinSelector(JoinRelType joinType, PhysType physType,
expressions.add(expression);
}
}

if (generateCompactCode) {
compactCode.add(Nullness.castNonNull(compactOutputVar));

// This expression generates code of the form:
// new org.apache.calcite.linq4j.function.Function2() {
// public String[] apply(org.apache.calcite.interpreter.Row left,
// org.apache.calcite.interpreter.Row right) {
// String[] outputArray = new String[left.length + right.length];
// System.arraycopy(left.copyValues(), 0, outputArray, 0, left.length);
// System.arraycopy(right.copyValues(), 0, outputArray, left.length, right.length);
// return outputArray;
// }
// public String[] apply(Object left, Object right) {
// return apply(
// (org.apache.calcite.interpreter.Row) left,
// (org.apache.calcite.interpreter.Row) right);
// }
// }
// That is, it converts the left and right Row objects to Object[] using Row#copyValues()
// then writes each to an output Object[] using System.arraycopy()

return Expressions.lambda(
Function2.class,
compactCode.toBlock(),
parameters);
}

return Expressions.lambda(
Function2.class,
physType.record(expressions),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.calcite.linq4j.tree.IndexExpression;
import org.apache.calcite.linq4j.tree.MemberExpression;
import org.apache.calcite.linq4j.tree.MethodCallExpression;
import org.apache.calcite.linq4j.tree.ParameterExpression;
import org.apache.calcite.linq4j.tree.Types;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.runtime.FlatLists;
Expand All @@ -35,6 +36,9 @@
import java.lang.reflect.Type;
import java.util.List;

import static org.apache.calcite.util.BuiltInMethod.ARRAY_COPY;
import static org.apache.calcite.util.BuiltInMethod.ROW_COPY_VALUES;

/**
* How a row is represented as a Java value.
*/
Expand Down Expand Up @@ -225,6 +229,11 @@ public enum JavaRowFormat {
}
return EnumUtils.convert(e, fromType, fieldType);
}

@Override public Expression fieldDynamic(Expression expression, Expression field) {
return Expressions.call(expression,
BuiltInMethod.ROW_VALUE.method, Expressions.constant(field));
}
},

ARRAY {
Expand Down Expand Up @@ -256,6 +265,23 @@ public enum JavaRowFormat {
}
return EnumUtils.convert(e, fromType, fieldType);
}

@Override public Expression fieldDynamic(Expression expression, Expression field) {
return Expressions.arrayIndex(expression, field);
}

@Override public Expression setFieldDynamic(Expression expression, Expression field,
Expression value) {
final IndexExpression e =
Expressions.arrayIndex(expression, Expressions.constant(field));
return Expressions.assign(e, value);
}

@Override public @Nullable Expression copy(ParameterExpression parameter,
ParameterExpression outputArray, int outputStartIndex, int length) {
return Expressions.call(ARRAY_COPY.method, parameter, Expressions.constant(0),
outputArray, Expressions.constant(outputStartIndex), Expressions.constant(length));
}
};

public JavaRowFormat optimize(RelDataType rowType) {
Expand Down Expand Up @@ -301,4 +327,33 @@ public abstract Expression record(
*/
public abstract Expression field(Expression expression, int field,
@Nullable Type fromType, Type fieldType);

/**
* Similar to {@link #field(Expression, int, Type, Type)}, where the field index is determined
* dynamically at runtime.
*/
public Expression fieldDynamic(Expression expression, Expression field) {
throw new UnsupportedOperationException(this.toString());
}

public Expression setFieldDynamic(Expression expression, Expression field, Expression value) {
throw new UnsupportedOperationException(this.toString());
}

/**
* Returns an expression that copies the fields of a row of this type to the array.
*/
public @Nullable Expression copy(ParameterExpression parameter,
ParameterExpression outputArray, int outputStartIndex, int length) {
// Note: parameter holds an expression representing a org.apache.calcite.interpreter.Row.

// Copy the Row as an Object[].
final Expression rowParameterAsArrayExpression =
Expressions.call(Object[].class, parameter, ROW_COPY_VALUES.method);

// Use System.arraycopy() with the contents of the Row as the source.
return Expressions.call(ARRAY_COPY.method, rowParameterAsArrayExpression,
Expressions.constant(0), outputArray, Expressions.constant(outputStartIndex),
Expressions.constant(length));
}
}
2 changes: 2 additions & 0 deletions core/src/main/java/org/apache/calcite/interpreter/Row.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ public static Row of(@Nullable Object...values) {
}

/** Returns a copy of the values. */
// Note: This implements BuiltInMethod.ROW_COPY_VALUES.
@SuppressWarnings("unused")
public @Nullable Object[] copyValues() {
return values.clone();
}
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public enum BuiltInMethod {
JDBC_SCHEMA_DATA_SOURCE(JdbcSchema.class, "getDataSource"),
ROW_VALUE(Row.class, "getObject", int.class),
ROW_AS_COPY(Row.class, "asCopy", Object[].class),
ROW_COPY_VALUES(Row.class, "copyValues"), // This is an instance method that returns an Object[].
RESULT_SET_ENUMERABLE_SET_TIMEOUT(ResultSetEnumerable.class, "setTimeout",
DataContext.class),
RESULT_SET_ENUMERABLE_OF(ResultSetEnumerable.class, "of", DataSource.class,
Expand Down Expand Up @@ -272,6 +273,8 @@ public enum BuiltInMethod {
FUNCTION1_APPLY(Function1.class, "apply", Object.class),
ARRAYS_AS_LIST(Arrays.class, "asList", Object[].class),
ARRAY(SqlFunctions.class, "array", Object[].class),
ARRAY_COPY(System.class, "arraycopy", Object.class, int.class, Object.class, int.class,
int.class),
// class PairList.Helper is deprecated to discourage code from calling its
// methods directly, but use via Janino code generation is just fine.
@SuppressWarnings("deprecation")
Expand Down
Loading

0 comments on commit e7098f2

Please sign in to comment.