From 60cc9dfcb5bc83dfe36d1f844e837c38aa8887a3 Mon Sep 17 00:00:00 2001 From: Martin Traverso Date: Fri, 24 Jan 2025 09:22:36 -0800 Subject: [PATCH] Remove deprecated precomputed hash optimizer --- .../io/trino/SystemSessionProperties.java | 11 - .../operator/FlatHashStrategyCompiler.java | 2 +- .../operator/HashAggregationOperator.java | 2 +- .../java/io/trino/operator/HashGenerator.java | 2 + .../operator/InterpretedHashGenerator.java | 5 +- .../io/trino/sql/planner/OptimizerConfig.java | 14 +- .../io/trino/sql/planner/PlanOptimizers.java | 4 - .../HashGenerationOptimizer.java | 979 ------------------ .../io/trino/cost/TestOptimizerConfig.java | 3 - .../trino/sql/planner/TestLogicalPlanner.java | 76 -- .../TestPreAggregateCaseAggregations.java | 2 - .../sql/query/TestPrecomputedHashes.java | 84 -- .../plugin/jdbc/BaseJdbcConnectorTest.java | 10 +- .../mysql/TestMySqlLegacyConnectorTest.java | 9 +- .../AbstractTestEngineOnlyQueries.java | 2 - ...estDistributedQueriesNoHashGeneration.java | 32 - 16 files changed, 11 insertions(+), 1226 deletions(-) delete mode 100644 core/trino-main/src/main/java/io/trino/sql/planner/optimizations/HashGenerationOptimizer.java delete mode 100644 core/trino-main/src/test/java/io/trino/sql/query/TestPrecomputedHashes.java delete mode 100644 testing/trino-tests/src/test/java/io/trino/tests/TestDistributedQueriesNoHashGeneration.java diff --git a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java index d6cd30d41b3b..4923b17ecc0c 100644 --- a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java +++ b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java @@ -60,7 +60,6 @@ public final class SystemSessionProperties implements SystemSessionPropertiesProvider { - public static final String OPTIMIZE_HASH_GENERATION = "optimize_hash_generation"; public static final String JOIN_DISTRIBUTION_TYPE = "join_distribution_type"; public static final String JOIN_MAX_BROADCAST_TABLE_SIZE = "join_max_broadcast_table_size"; public static final String JOIN_MULTI_CLAUSE_INDEPENDENCE_FACTOR = "join_multi_clause_independence_factor"; @@ -253,11 +252,6 @@ public SystemSessionProperties( "Policy used for scheduling query tasks", queryManagerConfig.getQueryExecutionPolicy(), false), - booleanProperty( - OPTIMIZE_HASH_GENERATION, - "Compute hash codes for distribution, joins, and aggregations early in query plan", - optimizerConfig.isOptimizeHashGeneration(), - false), enumProperty( JOIN_DISTRIBUTION_TYPE, "Join distribution type", @@ -1148,11 +1142,6 @@ public static String getExecutionPolicy(Session session) return session.getSystemProperty(EXECUTION_POLICY, String.class); } - public static boolean isOptimizeHashGenerationEnabled(Session session) - { - return session.getSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.class); - } - public static JoinDistributionType getJoinDistributionType(Session session) { return session.getSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.class); diff --git a/core/trino-main/src/main/java/io/trino/operator/FlatHashStrategyCompiler.java b/core/trino-main/src/main/java/io/trino/operator/FlatHashStrategyCompiler.java index 0d499f65d218..833efaf3fcbd 100644 --- a/core/trino-main/src/main/java/io/trino/operator/FlatHashStrategyCompiler.java +++ b/core/trino-main/src/main/java/io/trino/operator/FlatHashStrategyCompiler.java @@ -71,6 +71,7 @@ import static io.airlift.bytecode.expression.BytecodeExpressions.not; import static io.airlift.bytecode.expression.BytecodeExpressions.notEqual; import static io.trino.cache.SafeCaches.buildNonEvictableCache; +import static io.trino.operator.HashGenerator.INITIAL_HASH_VALUE; import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.BLOCK_POSITION_NOT_NULL; import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.FLAT; import static io.trino.spi.function.InvocationConvention.InvocationReturnConvention.BLOCK_BUILDER; @@ -81,7 +82,6 @@ import static io.trino.sql.gen.Bootstrap.BOOTSTRAP_METHOD; import static io.trino.sql.gen.BytecodeUtils.loadConstant; import static io.trino.sql.gen.SqlTypeBytecodeExpression.constantType; -import static io.trino.sql.planner.optimizations.HashGenerationOptimizer.INITIAL_HASH_VALUE; import static io.trino.util.CompilerUtils.defineClass; import static io.trino.util.CompilerUtils.makeClassName; diff --git a/core/trino-main/src/main/java/io/trino/operator/HashAggregationOperator.java b/core/trino-main/src/main/java/io/trino/operator/HashAggregationOperator.java index 964fa2f6d764..e1f3207b9c52 100644 --- a/core/trino-main/src/main/java/io/trino/operator/HashAggregationOperator.java +++ b/core/trino-main/src/main/java/io/trino/operator/HashAggregationOperator.java @@ -41,9 +41,9 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static io.trino.operator.HashGenerator.INITIAL_HASH_VALUE; import static io.trino.operator.aggregation.builder.InMemoryHashAggregationBuilder.toTypes; import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.sql.planner.optimizations.HashGenerationOptimizer.INITIAL_HASH_VALUE; import static io.trino.type.TypeUtils.NULL_HASH_CODE; import static java.util.Objects.requireNonNull; diff --git a/core/trino-main/src/main/java/io/trino/operator/HashGenerator.java b/core/trino-main/src/main/java/io/trino/operator/HashGenerator.java index 14b2d7b63648..7bc300457dfc 100644 --- a/core/trino-main/src/main/java/io/trino/operator/HashGenerator.java +++ b/core/trino-main/src/main/java/io/trino/operator/HashGenerator.java @@ -17,6 +17,8 @@ public interface HashGenerator { + int INITIAL_HASH_VALUE = 0; + long hashPosition(int position, Page page); default int getPartition(int partitionCount, int position, Page page) diff --git a/core/trino-main/src/main/java/io/trino/operator/InterpretedHashGenerator.java b/core/trino-main/src/main/java/io/trino/operator/InterpretedHashGenerator.java index 29590941c856..d0eba982db25 100644 --- a/core/trino-main/src/main/java/io/trino/operator/InterpretedHashGenerator.java +++ b/core/trino-main/src/main/java/io/trino/operator/InterpretedHashGenerator.java @@ -20,7 +20,6 @@ import io.trino.spi.block.Block; import io.trino.spi.type.Type; import io.trino.spi.type.TypeOperators; -import io.trino.sql.planner.optimizations.HashGenerationOptimizer; import jakarta.annotation.Nullable; import java.lang.invoke.MethodHandle; @@ -79,7 +78,7 @@ private InterpretedHashGenerator(List hashChannelTypes, @Nullable int[] ha public long hashPosition(int position, Page page) { // Note: this code is duplicated for performance but must logically match hashPosition(position, IntFunction blockProvider) - long result = HashGenerationOptimizer.INITIAL_HASH_VALUE; + long result = INITIAL_HASH_VALUE; for (int i = 0; i < hashCodeOperators.length; i++) { Block block = page.getBlock(hashChannels == null ? i : hashChannels[i]); result = CombineHashFunction.getHash(result, nullSafeHash(i, block, position)); @@ -90,7 +89,7 @@ public long hashPosition(int position, Page page) public long hashPosition(int position, IntFunction blockProvider) { // Note: this code is duplicated for performance but must logically match hashPosition(position, Page page) - long result = HashGenerationOptimizer.INITIAL_HASH_VALUE; + long result = INITIAL_HASH_VALUE; for (int i = 0; i < hashCodeOperators.length; i++) { Block block = blockProvider.apply(hashChannels == null ? i : hashChannels[i]); result = CombineHashFunction.getHash(result, nullSafeHash(i, block, position)); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/OptimizerConfig.java b/core/trino-main/src/main/java/io/trino/sql/planner/OptimizerConfig.java index 7678c2a97779..9bcc34dc087b 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/OptimizerConfig.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/OptimizerConfig.java @@ -35,6 +35,7 @@ "preferred-write-partitioning-min-number-of-partitions", "optimizer.use-mark-distinct", "optimizer.optimize-mixed-distinct-aggregations", + "optimizer.optimize-hash-generation", }) public class OptimizerConfig { @@ -67,7 +68,6 @@ public class OptimizerConfig private Duration iterativeOptimizerTimeout = new Duration(3, MINUTES); // by default let optimizer wait a long time in case it retrieves some data from ConnectorMetadata private boolean optimizeMetadataQueries; - private boolean optimizeHashGeneration; private boolean pushTableWriteThroughUnion = true; private boolean dictionaryAggregation; private MarkDistinctStrategy markDistinctStrategy; @@ -544,18 +544,6 @@ public OptimizerConfig setOptimizeTopNRanking(boolean optimizeTopNRanking) return this; } - public boolean isOptimizeHashGeneration() - { - return optimizeHashGeneration; - } - - @Config("optimizer.optimize-hash-generation") - public OptimizerConfig setOptimizeHashGeneration(boolean optimizeHashGeneration) - { - this.optimizeHashGeneration = optimizeHashGeneration; - return this; - } - public boolean isPushTableWriteThroughUnion() { return pushTableWriteThroughUnion; diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/PlanOptimizers.java b/core/trino-main/src/main/java/io/trino/sql/planner/PlanOptimizers.java index 22bc822a1402..435090952dfa 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/PlanOptimizers.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/PlanOptimizers.java @@ -253,7 +253,6 @@ import io.trino.sql.planner.optimizations.BeginTableWrite; import io.trino.sql.planner.optimizations.CheckSubqueryNodesAreRewritten; import io.trino.sql.planner.optimizations.DeterminePartitionCount; -import io.trino.sql.planner.optimizations.HashGenerationOptimizer; import io.trino.sql.planner.optimizations.IndexJoinOptimizer; import io.trino.sql.planner.optimizations.LimitPushDown; import io.trino.sql.planner.optimizations.MetadataQueryOptimizer; @@ -1023,9 +1022,6 @@ public PlanOptimizers( new RemoveRedundantIdentityProjections()))); // DO NOT add optimizers that change the plan shape (computations) after this point - // Precomputed hashes - this assumes that partitioning will not change - builder.add(new HashGenerationOptimizer(metadata)); - builder.add(new IterativeOptimizer( plannerContext, ruleStats, diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/HashGenerationOptimizer.java b/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/HashGenerationOptimizer.java deleted file mode 100644 index 5c8a4c84c074..000000000000 --- a/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/HashGenerationOptimizer.java +++ /dev/null @@ -1,979 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.sql.planner.optimizations; - -import com.google.common.collect.BiMap; -import com.google.common.collect.ImmutableBiMap; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableListMultimap; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.ImmutableSetMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import io.trino.SystemSessionProperties; -import io.trino.metadata.Metadata; -import io.trino.spi.function.OperatorType; -import io.trino.sql.ir.Call; -import io.trino.sql.ir.Coalesce; -import io.trino.sql.ir.Constant; -import io.trino.sql.ir.Expression; -import io.trino.sql.ir.Reference; -import io.trino.sql.planner.BuiltinFunctionCallBuilder; -import io.trino.sql.planner.Partitioning.ArgumentBinding; -import io.trino.sql.planner.PartitioningHandle; -import io.trino.sql.planner.PartitioningScheme; -import io.trino.sql.planner.PlanNodeIdAllocator; -import io.trino.sql.planner.Symbol; -import io.trino.sql.planner.SymbolAllocator; -import io.trino.sql.planner.plan.AggregationNode; -import io.trino.sql.planner.plan.ApplyNode; -import io.trino.sql.planner.plan.Assignments; -import io.trino.sql.planner.plan.CorrelatedJoinNode; -import io.trino.sql.planner.plan.DistinctLimitNode; -import io.trino.sql.planner.plan.EnforceSingleRowNode; -import io.trino.sql.planner.plan.ExchangeNode; -import io.trino.sql.planner.plan.GroupIdNode; -import io.trino.sql.planner.plan.IndexJoinNode; -import io.trino.sql.planner.plan.IndexJoinNode.EquiJoinClause; -import io.trino.sql.planner.plan.JoinNode; -import io.trino.sql.planner.plan.MarkDistinctNode; -import io.trino.sql.planner.plan.PlanNode; -import io.trino.sql.planner.plan.PlanVisitor; -import io.trino.sql.planner.plan.ProjectNode; -import io.trino.sql.planner.plan.RowNumberNode; -import io.trino.sql.planner.plan.SemiJoinNode; -import io.trino.sql.planner.plan.SpatialJoinNode; -import io.trino.sql.planner.plan.TopNRankingNode; -import io.trino.sql.planner.plan.UnionNode; -import io.trino.sql.planner.plan.UnnestNode; -import io.trino.sql.planner.plan.WindowNode; - -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static io.trino.metadata.OperatorNameUtil.mangleOperatorName; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION; -import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_HASH_DISTRIBUTION; -import static io.trino.sql.planner.plan.ChildReplacer.replaceChildren; -import static io.trino.sql.planner.plan.JoinType.INNER; -import static io.trino.sql.planner.plan.JoinType.LEFT; -import static io.trino.sql.planner.plan.JoinType.RIGHT; -import static io.trino.type.TypeUtils.NULL_HASH_CODE; -import static java.util.Objects.requireNonNull; - -public class HashGenerationOptimizer - implements PlanOptimizer -{ - public static final int INITIAL_HASH_VALUE = 0; - private static final String HASH_CODE = mangleOperatorName(OperatorType.HASH_CODE); - - private final Metadata metadata; - - public HashGenerationOptimizer(Metadata metadata) - { - this.metadata = requireNonNull(metadata, "metadata is null"); - } - - @Override - public PlanNode optimize(PlanNode plan, Context context) - { - requireNonNull(plan, "plan is null"); - if (SystemSessionProperties.isOptimizeHashGenerationEnabled(context.session())) { - PlanWithProperties result = plan.accept(new Rewriter(metadata, context.idAllocator(), context.symbolAllocator()), new HashComputationSet()); - return result.getNode(); - } - return plan; - } - - private static class Rewriter - extends PlanVisitor - { - private final Metadata metadata; - private final PlanNodeIdAllocator idAllocator; - private final SymbolAllocator symbolAllocator; - - private Rewriter(Metadata metadata, PlanNodeIdAllocator idAllocator, SymbolAllocator symbolAllocator) - { - this.metadata = requireNonNull(metadata, "metadata is null"); - this.idAllocator = requireNonNull(idAllocator, "idAllocator is null"); - this.symbolAllocator = requireNonNull(symbolAllocator, "symbolAllocator is null"); - } - - @Override - protected PlanWithProperties visitPlan(PlanNode node, HashComputationSet parentPreference) - { - return planSimpleNodeWithProperties(node, parentPreference); - } - - @Override - public PlanWithProperties visitEnforceSingleRow(EnforceSingleRowNode node, HashComputationSet parentPreference) - { - // this plan node can only have a single input symbol, so do not add extra hash symbols - return planSimpleNodeWithProperties(node, new HashComputationSet(), true); - } - - @Override - public PlanWithProperties visitApply(ApplyNode node, HashComputationSet context) - { - // Apply node is not supported by execution, so do not rewrite it - // that way query will fail in sanity checkers - return new PlanWithProperties(node, ImmutableMap.of()); - } - - @Override - public PlanWithProperties visitCorrelatedJoin(CorrelatedJoinNode node, HashComputationSet context) - { - // Correlated join node is not supported by execution, so do not rewrite it - // that way query will fail in sanity checkers - return new PlanWithProperties(node, ImmutableMap.of()); - } - - @Override - public PlanWithProperties visitAggregation(AggregationNode node, HashComputationSet parentPreference) - { - Optional groupByHash = Optional.empty(); - if (!node.isStreamable() && !canSkipHashGeneration(node.getGroupingKeys())) { - groupByHash = computeHash(node.getGroupingKeys()); - } - - // aggregation does not pass through preferred hash symbols - HashComputationSet requiredHashes = new HashComputationSet(groupByHash); - PlanWithProperties child = planAndEnforce(node.getSource(), requiredHashes, false, requiredHashes); - - Optional hashSymbol = groupByHash.map(child::getRequiredHashSymbol); - - return new PlanWithProperties( - AggregationNode.builderFrom(node) - .setSource(child.getNode()) - .setHashSymbol(hashSymbol) - .build(), - hashSymbol.isPresent() ? ImmutableMap.of(groupByHash.get(), hashSymbol.get()) : ImmutableMap.of()); - } - - private boolean canSkipHashGeneration(List partitionSymbols) - { - // HACK: bigint grouped aggregation has special operators that do not use precomputed hash, so we can skip hash generation - return partitionSymbols.isEmpty() || partitionSymbols.size() == 1 && Iterables.getOnlyElement(partitionSymbols).type().equals(BIGINT); - } - - @Override - public PlanWithProperties visitGroupId(GroupIdNode node, HashComputationSet parentPreference) - { - // remove any hash symbols not exported by the source of this node - return planSimpleNodeWithProperties(node, parentPreference.pruneSymbols(node.getSource().getOutputSymbols())); - } - - @Override - public PlanWithProperties visitDistinctLimit(DistinctLimitNode node, HashComputationSet parentPreference) - { - // skip hash symbol generation for single bigint - if (canSkipHashGeneration(node.getDistinctSymbols())) { - return planSimpleNodeWithProperties(node, parentPreference); - } - - Optional hashComputation = computeHash(node.getDistinctSymbols()); - PlanWithProperties child = planAndEnforce( - node.getSource(), - new HashComputationSet(hashComputation), - false, - parentPreference.withHashComputation(node, hashComputation)); - Symbol hashSymbol = child.getRequiredHashSymbol(hashComputation.get()); - - // TODO: we need to reason about how pre-computed hashes from child relate to distinct symbols. We should be able to include any precomputed hash - // that's functionally dependent on the distinct field in the set of distinct fields of the new node to be able to propagate it downstream. - // Currently, such precomputed hashes will be dropped by this operation. - return new PlanWithProperties( - new DistinctLimitNode(node.getId(), child.getNode(), node.getLimit(), node.isPartial(), node.getDistinctSymbols(), Optional.of(hashSymbol)), - ImmutableMap.of(hashComputation.get(), hashSymbol)); - } - - @Override - public PlanWithProperties visitMarkDistinct(MarkDistinctNode node, HashComputationSet parentPreference) - { - // skip hash symbol generation for single bigint - if (canSkipHashGeneration(node.getDistinctSymbols())) { - return planSimpleNodeWithProperties(node, parentPreference, false); - } - - Optional hashComputation = computeHash(node.getDistinctSymbols()); - PlanWithProperties child = planAndEnforce( - node.getSource(), - new HashComputationSet(hashComputation), - false, - parentPreference.withHashComputation(node, hashComputation)); - Symbol hashSymbol = child.getRequiredHashSymbol(hashComputation.get()); - - return new PlanWithProperties( - new MarkDistinctNode(node.getId(), child.getNode(), node.getMarkerSymbol(), node.getDistinctSymbols(), Optional.of(hashSymbol)), - child.getHashSymbols()); - } - - @Override - public PlanWithProperties visitRowNumber(RowNumberNode node, HashComputationSet parentPreference) - { - if (node.getPartitionBy().isEmpty()) { - return planSimpleNodeWithProperties(node, parentPreference); - } - - Optional hashComputation = computeHash(node.getPartitionBy()); - PlanWithProperties child = planAndEnforce( - node.getSource(), - new HashComputationSet(hashComputation), - false, - parentPreference.withHashComputation(node, hashComputation)); - Symbol hashSymbol = child.getRequiredHashSymbol(hashComputation.get()); - - return new PlanWithProperties( - new RowNumberNode( - node.getId(), - child.getNode(), - node.getPartitionBy(), - node.isOrderSensitive(), - node.getRowNumberSymbol(), - node.getMaxRowCountPerPartition(), - Optional.of(hashSymbol)), - child.getHashSymbols()); - } - - @Override - public PlanWithProperties visitTopNRanking(TopNRankingNode node, HashComputationSet parentPreference) - { - if (node.getPartitionBy().isEmpty()) { - return planSimpleNodeWithProperties(node, parentPreference); - } - - Optional hashComputation = computeHash(node.getPartitionBy()); - PlanWithProperties child = planAndEnforce( - node.getSource(), - new HashComputationSet(hashComputation), - false, - parentPreference.withHashComputation(node, hashComputation)); - Symbol hashSymbol = child.getRequiredHashSymbol(hashComputation.get()); - - return new PlanWithProperties( - new TopNRankingNode( - node.getId(), - child.getNode(), - node.getSpecification(), - node.getRankingType(), - node.getRankingSymbol(), - node.getMaxRankingPerPartition(), - node.isPartial(), - Optional.of(hashSymbol)), - child.getHashSymbols()); - } - - @Override - public PlanWithProperties visitJoin(JoinNode node, HashComputationSet parentPreference) - { - List clauses = node.getCriteria(); - if (clauses.isEmpty()) { - // join does not pass through preferred hash symbols since they take more memory and since - // the join node filters, may take more compute - PlanWithProperties left = planAndEnforce(node.getLeft(), new HashComputationSet(), true, new HashComputationSet()); - PlanWithProperties right = planAndEnforce(node.getRight(), new HashComputationSet(), true, new HashComputationSet()); - checkState(left.getHashSymbols().isEmpty() && right.getHashSymbols().isEmpty()); - return new PlanWithProperties( - replaceChildren(node, ImmutableList.of(left.getNode(), right.getNode())), - ImmutableMap.of()); - } - - // join does not pass through preferred hash symbols since they take more memory and since - // the join node filters, may take more compute - Optional leftHashComputation = computeHash(Lists.transform(clauses, JoinNode.EquiJoinClause::getLeft)); - PlanWithProperties left = planAndEnforce(node.getLeft(), new HashComputationSet(leftHashComputation), true, new HashComputationSet(leftHashComputation)); - Symbol leftHashSymbol = left.getRequiredHashSymbol(leftHashComputation.get()); - - Optional rightHashComputation = computeHash(Lists.transform(clauses, JoinNode.EquiJoinClause::getRight)); - // drop undesired hash symbols from build to save memory - PlanWithProperties right = planAndEnforce(node.getRight(), new HashComputationSet(rightHashComputation), true, new HashComputationSet(rightHashComputation)); - Symbol rightHashSymbol = right.getRequiredHashSymbol(rightHashComputation.get()); - - // build map of all hash symbols - // NOTE: Full outer join doesn't use hash symbols - Map allHashSymbols = new HashMap<>(); - if (node.getType() == INNER || node.getType() == LEFT) { - allHashSymbols.putAll(left.getHashSymbols()); - } - if (node.getType() == INNER || node.getType() == RIGHT) { - allHashSymbols.putAll(right.getHashSymbols()); - } - - return buildJoinNodeWithPreferredHashes(node, left, right, allHashSymbols, parentPreference, Optional.of(leftHashSymbol), Optional.of(rightHashSymbol)); - } - - private PlanWithProperties buildJoinNodeWithPreferredHashes( - JoinNode node, - PlanWithProperties left, - PlanWithProperties right, - Map allHashSymbols, - HashComputationSet parentPreference, - Optional leftHashSymbol, - Optional rightHashSymbol) - { - // retain only hash symbols preferred by parent nodes - Map hashSymbolsWithParentPreferences = - allHashSymbols.entrySet() - .stream() - .filter(entry -> parentPreference.getHashes().contains(entry.getKey())) - .collect(toImmutableMap(Entry::getKey, Entry::getValue)); - Set preferredHashSymbols = ImmutableSet.copyOf(hashSymbolsWithParentPreferences.values()); - Set leftOutputSymbols = ImmutableSet.copyOf(node.getLeftOutputSymbols()); - Set rightOutputSymbols = ImmutableSet.copyOf(node.getRightOutputSymbols()); - - List newLeftOutputSymbols = left.getNode().getOutputSymbols().stream() - .filter(symbol -> leftOutputSymbols.contains(symbol) || preferredHashSymbols.contains(symbol)) - .collect(toImmutableList()); - List newRightOutputSymbols = right.getNode().getOutputSymbols().stream() - .filter(symbol -> rightOutputSymbols.contains(symbol) || preferredHashSymbols.contains(symbol)) - .collect(toImmutableList()); - - return new PlanWithProperties( - new JoinNode( - node.getId(), - node.getType(), - left.getNode(), - right.getNode(), - node.getCriteria(), - newLeftOutputSymbols, - newRightOutputSymbols, - node.isMaySkipOutputDuplicates(), - node.getFilter(), - leftHashSymbol, - rightHashSymbol, - node.getDistributionType(), - node.isSpillable(), - node.getDynamicFilters(), - node.getReorderJoinStatsAndCost()), - hashSymbolsWithParentPreferences); - } - - @Override - public PlanWithProperties visitSemiJoin(SemiJoinNode node, HashComputationSet parentPreference) - { - Optional sourceHashComputation = computeHash(ImmutableList.of(node.getSourceJoinSymbol())); - PlanWithProperties source = planAndEnforce( - node.getSource(), - new HashComputationSet(sourceHashComputation), - true, - new HashComputationSet(sourceHashComputation)); - Symbol sourceHashSymbol = source.getRequiredHashSymbol(sourceHashComputation.get()); - - Optional filterHashComputation = computeHash(ImmutableList.of(node.getFilteringSourceJoinSymbol())); - HashComputationSet requiredHashes = new HashComputationSet(filterHashComputation); - PlanWithProperties filteringSource = planAndEnforce(node.getFilteringSource(), requiredHashes, true, requiredHashes); - Symbol filteringSourceHashSymbol = filteringSource.getRequiredHashSymbol(filterHashComputation.get()); - - return new PlanWithProperties( - new SemiJoinNode( - node.getId(), - source.getNode(), - filteringSource.getNode(), - node.getSourceJoinSymbol(), - node.getFilteringSourceJoinSymbol(), - node.getSemiJoinOutput(), - Optional.of(sourceHashSymbol), - Optional.of(filteringSourceHashSymbol), - node.getDistributionType(), - node.getDynamicFilterId()), - source.getHashSymbols()); - } - - @Override - public PlanWithProperties visitSpatialJoin(SpatialJoinNode node, HashComputationSet parentPreference) - { - PlanWithProperties left = planAndEnforce(node.getLeft(), new HashComputationSet(), true, new HashComputationSet()); - PlanWithProperties right = planAndEnforce(node.getRight(), new HashComputationSet(), true, new HashComputationSet()); - verify(left.getHashSymbols().isEmpty(), "probe side of the spatial join should not include hash symbols"); - verify(right.getHashSymbols().isEmpty(), "build side of the spatial join should not include hash symbols"); - return new PlanWithProperties( - replaceChildren(node, ImmutableList.of(left.getNode(), right.getNode())), - ImmutableMap.of()); - } - - @Override - public PlanWithProperties visitIndexJoin(IndexJoinNode node, HashComputationSet parentPreference) - { - List clauses = node.getCriteria(); - - // join does not pass through preferred hash symbols since they take more memory and since - // the join node filters, may take more compute - Optional probeHashComputation = computeHash(Lists.transform(clauses, IndexJoinNode.EquiJoinClause::getProbe)); - PlanWithProperties probe = planAndEnforce( - node.getProbeSource(), - new HashComputationSet(probeHashComputation), - true, - new HashComputationSet(probeHashComputation)); - Symbol probeHashSymbol = probe.getRequiredHashSymbol(probeHashComputation.get()); - - Optional indexHashComputation = computeHash(Lists.transform(clauses, EquiJoinClause::getIndex)); - HashComputationSet requiredHashes = new HashComputationSet(indexHashComputation); - PlanWithProperties index = planAndEnforce(node.getIndexSource(), requiredHashes, true, requiredHashes); - Symbol indexHashSymbol = index.getRequiredHashSymbol(indexHashComputation.get()); - - // build map of all hash symbols - Map allHashSymbols = new HashMap<>(); - if (node.getType() == IndexJoinNode.Type.INNER) { - allHashSymbols.putAll(probe.getHashSymbols()); - } - allHashSymbols.putAll(index.getHashSymbols()); - - return new PlanWithProperties( - new IndexJoinNode( - node.getId(), - node.getType(), - probe.getNode(), - index.getNode(), - node.getCriteria(), - Optional.of(probeHashSymbol), - Optional.of(indexHashSymbol)), - allHashSymbols); - } - - @Override - public PlanWithProperties visitWindow(WindowNode node, HashComputationSet parentPreference) - { - if (node.getPartitionBy().isEmpty()) { - return planSimpleNodeWithProperties(node, parentPreference, true); - } - - Optional hashComputation = computeHash(node.getPartitionBy()); - PlanWithProperties child = planAndEnforce( - node.getSource(), - new HashComputationSet(hashComputation), - true, - parentPreference.withHashComputation(node, hashComputation)); - - Symbol hashSymbol = child.getRequiredHashSymbol(hashComputation.get()); - - return new PlanWithProperties( - new WindowNode( - node.getId(), - child.getNode(), - node.getSpecification(), - node.getWindowFunctions(), - Optional.of(hashSymbol), - node.getPrePartitionedInputs(), - node.getPreSortedOrderPrefix()), - child.getHashSymbols()); - } - - @Override - public PlanWithProperties visitExchange(ExchangeNode node, HashComputationSet parentPreference) - { - // remove any hash symbols not exported by this node - HashComputationSet preference = parentPreference.pruneSymbols(node.getOutputSymbols()); - - // Currently, precomputed hash values are only supported for system hash distributions without constants - Optional partitionSymbols = Optional.empty(); - PartitioningScheme partitioningScheme = node.getPartitioningScheme(); - PartitioningHandle partitioningHandle = partitioningScheme.getPartitioning().getHandle(); - - if ((partitioningHandle.equals(FIXED_HASH_DISTRIBUTION) - || partitioningHandle.equals(SCALED_WRITER_HASH_DISTRIBUTION)) - && partitioningScheme.getPartitioning().getArguments().stream().allMatch(ArgumentBinding::isVariable)) { - // add precomputed hash for exchange - partitionSymbols = computeHash(partitioningScheme.getPartitioning().getArguments().stream() - .map(ArgumentBinding::getColumn) - .collect(toImmutableList())); - preference = preference.withHashComputation(partitionSymbols); - } - - // establish fixed ordering for hash symbols - List hashSymbolOrder = ImmutableList.copyOf(preference.getHashes()); - Map newHashSymbols = new HashMap<>(); - for (HashComputation preferredHashSymbol : hashSymbolOrder) { - newHashSymbols.put(preferredHashSymbol, symbolAllocator.newSymbol("$hashValue", BIGINT)); - } - - // rewrite partition function to include new symbols (and precomputed hash) - partitioningScheme = new PartitioningScheme( - partitioningScheme.getPartitioning(), - ImmutableList.builder() - .addAll(partitioningScheme.getOutputLayout()) - .addAll(hashSymbolOrder.stream() - .map(newHashSymbols::get) - .collect(toImmutableList())) - .build(), - partitionSymbols.map(newHashSymbols::get), - partitioningScheme.isReplicateNullsAndAny(), - partitioningScheme.getBucketToPartition(), - partitioningScheme.getPartitionCount()); - - // add hash symbols to sources - ImmutableList.Builder> newInputs = ImmutableList.builder(); - ImmutableList.Builder newSources = ImmutableList.builder(); - for (int sourceId = 0; sourceId < node.getSources().size(); sourceId++) { - PlanNode source = node.getSources().get(sourceId); - List inputSymbols = node.getInputs().get(sourceId); - - Map outputToInputMap = new HashMap<>(); - for (int symbolId = 0; symbolId < inputSymbols.size(); symbolId++) { - outputToInputMap.put(node.getOutputSymbols().get(symbolId), inputSymbols.get(symbolId)); - } - Function> outputToInputTranslator = symbol -> Optional.of(outputToInputMap.get(symbol)); - - HashComputationSet sourceContext = preference.translate(outputToInputTranslator); - PlanWithProperties child = planAndEnforce(source, sourceContext, true, sourceContext); - newSources.add(child.getNode()); - - // add hash symbols to inputs in the required order - ImmutableList.Builder newInputSymbols = ImmutableList.builder(); - newInputSymbols.addAll(node.getInputs().get(sourceId)); - for (HashComputation preferredHashSymbol : hashSymbolOrder) { - HashComputation hashComputation = preferredHashSymbol.translate(outputToInputTranslator).get(); - newInputSymbols.add(child.getRequiredHashSymbol(hashComputation)); - } - - newInputs.add(newInputSymbols.build()); - } - - return new PlanWithProperties( - new ExchangeNode( - node.getId(), - node.getType(), - node.getScope(), - partitioningScheme, - newSources.build(), - newInputs.build(), - node.getOrderingScheme()), - newHashSymbols); - } - - @Override - public PlanWithProperties visitUnion(UnionNode node, HashComputationSet parentPreference) - { - // remove any hash symbols not exported by this node - HashComputationSet preference = parentPreference.pruneSymbols(node.getOutputSymbols()); - - // create new hash symbols - Map newHashSymbols = new HashMap<>(); - for (HashComputation preferredHashSymbol : preference.getHashes()) { - newHashSymbols.put(preferredHashSymbol, symbolAllocator.newSymbol("$hashValue", BIGINT)); - } - - // add hash symbols to sources - ImmutableListMultimap.Builder newSymbolMapping = ImmutableListMultimap.builder(); - newSymbolMapping.putAll(node.getSymbolMapping()); - ImmutableList.Builder newSources = ImmutableList.builder(); - for (int sourceId = 0; sourceId < node.getSources().size(); sourceId++) { - // translate preference to input symbols - Map outputToInputMap = new HashMap<>(); - for (Symbol outputSymbol : node.getOutputSymbols()) { - outputToInputMap.put(outputSymbol, node.getSymbolMapping().get(outputSymbol).get(sourceId)); - } - Function> outputToInputTranslator = symbol -> Optional.of(outputToInputMap.get(symbol)); - - HashComputationSet sourcePreference = preference.translate(outputToInputTranslator); - PlanWithProperties child = planAndEnforce(node.getSources().get(sourceId), sourcePreference, true, sourcePreference); - newSources.add(child.getNode()); - - // add hash symbols to inputs - for (Entry entry : newHashSymbols.entrySet()) { - HashComputation hashComputation = entry.getKey().translate(outputToInputTranslator).get(); - newSymbolMapping.put(entry.getValue(), child.getRequiredHashSymbol(hashComputation)); - } - } - - return new PlanWithProperties( - new UnionNode( - node.getId(), - newSources.build(), - newSymbolMapping.build(), - ImmutableList.copyOf(newSymbolMapping.build().keySet())), - newHashSymbols); - } - - @Override - public PlanWithProperties visitProject(ProjectNode node, HashComputationSet parentPreference) - { - Map outputToInputMapping = computeIdentityTranslations(node.getAssignments().getMap()); - HashComputationSet sourceContext = parentPreference.translate(symbol -> Optional.ofNullable(outputToInputMapping.get(symbol))); - PlanWithProperties child = plan(node.getSource(), sourceContext); - - // create a new project node with all assignments from the original node - Assignments.Builder newAssignments = Assignments.builder(); - newAssignments.putAll(node.getAssignments()); - - // and all hash symbols that could be translated to the source symbols - Map allHashSymbols = new HashMap<>(); - for (HashComputation hashComputation : sourceContext.getHashes()) { - Symbol hashSymbol = child.getHashSymbols().get(hashComputation); - Expression hashExpression; - if (hashSymbol == null) { - hashSymbol = symbolAllocator.newSymbol("$hashValue", BIGINT); - hashExpression = hashComputation.getHashExpression(metadata); - } - else { - hashExpression = hashSymbol.toSymbolReference(); - } - newAssignments.put(hashSymbol, hashExpression); - for (HashComputation sourceHashComputation : sourceContext.lookup(hashComputation)) { - allHashSymbols.put(sourceHashComputation, hashSymbol); - } - } - - return new PlanWithProperties(new ProjectNode(node.getId(), child.getNode(), newAssignments.build()), allHashSymbols); - } - - @Override - public PlanWithProperties visitUnnest(UnnestNode node, HashComputationSet parentPreference) - { - PlanWithProperties child = plan(node.getSource(), parentPreference.pruneSymbols(node.getSource().getOutputSymbols())); - - // only pass through hash symbols requested by the parent - Map hashSymbols = new HashMap<>(child.getHashSymbols()); - hashSymbols.keySet().retainAll(parentPreference.getHashes()); - - return new PlanWithProperties( - new UnnestNode( - node.getId(), - child.getNode(), - ImmutableList.builder() - .addAll(node.getReplicateSymbols()) - .addAll(hashSymbols.values()) - .build(), - node.getMappings(), - node.getOrdinalitySymbol(), - node.getJoinType()), - hashSymbols); - } - - private PlanWithProperties planSimpleNodeWithProperties(PlanNode node, HashComputationSet preferredHashes) - { - return planSimpleNodeWithProperties(node, preferredHashes, true); - } - - private PlanWithProperties planSimpleNodeWithProperties( - PlanNode node, - HashComputationSet preferredHashes, - boolean alwaysPruneExtraHashSymbols) - { - if (node.getSources().isEmpty()) { - return new PlanWithProperties(node, ImmutableMap.of()); - } - - // There is no requirement to produce hash symbols and only preference for symbols - PlanWithProperties source = planAndEnforce(Iterables.getOnlyElement(node.getSources()), new HashComputationSet(), alwaysPruneExtraHashSymbols, preferredHashes); - PlanNode result = replaceChildren(node, ImmutableList.of(source.getNode())); - - // return only hash symbols that are passed through the new node - Map hashSymbols = new HashMap<>(source.getHashSymbols()); - hashSymbols.values().retainAll(result.getOutputSymbols()); - - return new PlanWithProperties(result, hashSymbols); - } - - private PlanWithProperties planAndEnforce( - PlanNode node, - HashComputationSet requiredHashes, - boolean pruneExtraHashSymbols, - HashComputationSet preferredHashes) - { - PlanWithProperties result = plan(node, preferredHashes); - - boolean preferenceSatisfied; - if (pruneExtraHashSymbols) { - // Make sure that - // (1) result has all required hashes - // (2) any extra hashes are preferred hashes (e.g. no pruning is needed) - Set resultHashes = result.getHashSymbols().keySet(); - Set requiredAndPreferredHashes = ImmutableSet.builder() - .addAll(requiredHashes.getHashes()) - .addAll(preferredHashes.getHashes()) - .build(); - preferenceSatisfied = resultHashes.containsAll(requiredHashes.getHashes()) && - requiredAndPreferredHashes.containsAll(resultHashes); - } - else { - preferenceSatisfied = result.getHashSymbols().keySet().containsAll(requiredHashes.getHashes()); - } - - if (preferenceSatisfied) { - return result; - } - - return enforce(result, requiredHashes); - } - - private PlanWithProperties enforce(PlanWithProperties planWithProperties, HashComputationSet requiredHashes) - { - Assignments.Builder assignments = Assignments.builder(); - - Map outputHashSymbols = new HashMap<>(); - - // copy through all symbols from child, except for hash symbols not needed by the parent - Map resultHashSymbols = planWithProperties.getHashSymbols().inverse(); - for (Symbol symbol : planWithProperties.getNode().getOutputSymbols()) { - HashComputation partitionSymbols = resultHashSymbols.get(symbol); - if (partitionSymbols == null || requiredHashes.getHashes().contains(partitionSymbols)) { - assignments.putIdentity(symbol); - - if (partitionSymbols != null) { - outputHashSymbols.put(partitionSymbols, symbol); - } - } - } - - // add new projections for hash symbols needed by the parent - for (HashComputation hashComputation : requiredHashes.getHashes()) { - if (!planWithProperties.getHashSymbols().containsKey(hashComputation)) { - Expression hashExpression = hashComputation.getHashExpression(metadata); - Symbol hashSymbol = symbolAllocator.newSymbol("$hashValue", BIGINT); - assignments.put(hashSymbol, hashExpression); - outputHashSymbols.put(hashComputation, hashSymbol); - } - } - - ProjectNode projectNode = new ProjectNode(idAllocator.getNextId(), planWithProperties.getNode(), assignments.build()); - return new PlanWithProperties(projectNode, outputHashSymbols); - } - - private PlanWithProperties plan(PlanNode node, HashComputationSet parentPreference) - { - PlanWithProperties result = node.accept(this, parentPreference); - checkState( - result.getNode().getOutputSymbols().containsAll(result.getHashSymbols().values()), - "Node %s declares hash symbols not in the output", - result.getNode().getClass().getSimpleName()); - return result; - } - } - - private static class HashComputationSet - { - private final Multimap hashes; - - public HashComputationSet() - { - hashes = ImmutableSetMultimap.of(); - } - - public HashComputationSet(Optional hash) - { - requireNonNull(hash, "hash is null"); - if (hash.isPresent()) { - this.hashes = ImmutableSetMultimap.of(hash.get(), hash.get()); - } - else { - this.hashes = ImmutableSetMultimap.of(); - } - } - - private HashComputationSet(Multimap hashes) - { - requireNonNull(hashes, "hashes is null"); - this.hashes = ImmutableSetMultimap.copyOf(hashes); - } - - public Set getHashes() - { - return hashes.keySet(); - } - - public HashComputationSet pruneSymbols(List symbols) - { - Set uniqueSymbols = ImmutableSet.copyOf(symbols); - ImmutableSetMultimap.Builder builder = ImmutableSetMultimap.builder(); - - hashes.keySet().stream() - .filter(hash -> hash.canComputeWith(uniqueSymbols)) - .forEach(hash -> builder.putAll(hash, hashes.get(hash))); - - return new HashComputationSet(builder.build()); - } - - public HashComputationSet translate(Function> translator) - { - ImmutableSetMultimap.Builder builder = ImmutableSetMultimap.builder(); - for (HashComputation hashComputation : hashes.keySet()) { - hashComputation.translate(translator) - .ifPresent(hash -> builder.put(hash, hashComputation)); - } - return new HashComputationSet(builder.build()); - } - - public Collection lookup(HashComputation hashComputation) - { - return hashes.get(hashComputation); - } - - public HashComputationSet withHashComputation(PlanNode node, Optional hashComputation) - { - return pruneSymbols(node.getOutputSymbols()).withHashComputation(hashComputation); - } - - public HashComputationSet withHashComputation(Optional hashComputation) - { - if (hashComputation.isEmpty() || hashes.containsKey(hashComputation.get())) { - return this; - } - return new HashComputationSet(ImmutableSetMultimap.builder() - .putAll(hashes) - .put(hashComputation.get(), hashComputation.get()) - .build()); - } - } - - private static Optional computeHash(Iterable fields) - { - requireNonNull(fields, "fields is null"); - List symbols = ImmutableList.copyOf(fields); - if (symbols.isEmpty()) { - return Optional.empty(); - } - return Optional.of(new HashComputation(fields)); - } - - private static class HashComputation - { - private final List fields; - - private HashComputation(Iterable fields) - { - requireNonNull(fields, "fields is null"); - this.fields = ImmutableList.copyOf(fields); - checkArgument(!this.fields.isEmpty(), "fields cannot be empty"); - } - - public Optional translate(Function> translator) - { - ImmutableList.Builder newSymbols = ImmutableList.builder(); - for (Symbol field : fields) { - Optional newSymbol = translator.apply(field); - if (newSymbol.isEmpty()) { - return Optional.empty(); - } - newSymbols.add(newSymbol.get()); - } - return computeHash(newSymbols.build()); - } - - public boolean canComputeWith(Set availableFields) - { - return availableFields.containsAll(fields); - } - - private Expression getHashExpression(Metadata metadata) - { - Expression hashExpression = new Constant(BIGINT, (long) INITIAL_HASH_VALUE); - for (Symbol field : fields) { - hashExpression = getHashFunctionCall(hashExpression, field, metadata); - } - return hashExpression; - } - - private static Expression getHashFunctionCall(Expression previousHashValue, Symbol symbol, Metadata metadata) - { - Call call = BuiltinFunctionCallBuilder.resolve(metadata) - .setName(HASH_CODE) - .addArgument(symbol.type(), symbol.toSymbolReference()) - .build(); - - return BuiltinFunctionCallBuilder.resolve(metadata) - .setName("combine_hash") - .addArgument(BIGINT, previousHashValue) - .addArgument(BIGINT, orNullHashCode(call)) - .build(); - } - - private static Expression orNullHashCode(Expression expression) - { - return new Coalesce(expression, new Constant(BIGINT, (long) NULL_HASH_CODE)); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - HashComputation that = (HashComputation) o; - return Objects.equals(fields, that.fields); - } - - @Override - public int hashCode() - { - return Objects.hash(fields); - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("fields", fields) - .toString(); - } - } - - private static class PlanWithProperties - { - private final PlanNode node; - private final BiMap hashSymbols; - - public PlanWithProperties(PlanNode node, Map hashSymbols) - { - this.node = requireNonNull(node, "node is null"); - this.hashSymbols = ImmutableBiMap.copyOf(requireNonNull(hashSymbols, "hashSymbols is null")); - } - - public PlanNode getNode() - { - return node; - } - - public BiMap getHashSymbols() - { - return hashSymbols; - } - - public Symbol getRequiredHashSymbol(HashComputation hash) - { - Symbol hashSymbol = hashSymbols.get(hash); - requireNonNull(hashSymbol, () -> "No hash symbol generated for " + hash); - return hashSymbol; - } - } - - private static Map computeIdentityTranslations(Map assignments) - { - Map outputToInput = new HashMap<>(); - for (Map.Entry assignment : assignments.entrySet()) { - if (assignment.getValue() instanceof Reference) { - outputToInput.put(assignment.getKey(), Symbol.from(assignment.getValue())); - } - } - return outputToInput; - } -} diff --git a/core/trino-main/src/test/java/io/trino/cost/TestOptimizerConfig.java b/core/trino-main/src/test/java/io/trino/cost/TestOptimizerConfig.java index f9308c50794f..fd4739b4f4d1 100644 --- a/core/trino-main/src/test/java/io/trino/cost/TestOptimizerConfig.java +++ b/core/trino-main/src/test/java/io/trino/cost/TestOptimizerConfig.java @@ -60,7 +60,6 @@ public void testDefaults() .setFilterConjunctionIndependenceFactor(0.75) .setNonEstimatablePredicateApproximationEnabled(true) .setOptimizeMetadataQueries(false) - .setOptimizeHashGeneration(false) .setPushTableWriteThroughUnion(true) .setDictionaryAggregation(false) .setIterativeOptimizerTimeout(new Duration(3, MINUTES)) @@ -122,7 +121,6 @@ public void testExplicitPropertyMappings() .put("distributed-sort", "false") .put("use-preferred-write-partitioning", "false") .put("optimizer.optimize-metadata-queries", "true") - .put("optimizer.optimize-hash-generation", "true") .put("optimizer.push-table-write-through-union", "false") .put("optimizer.dictionary-aggregation", "true") .put("optimizer.push-aggregation-through-outer-join", "false") @@ -177,7 +175,6 @@ public void testExplicitPropertyMappings() .setFilterConjunctionIndependenceFactor(1.0) .setNonEstimatablePredicateApproximationEnabled(false) .setOptimizeMetadataQueries(true) - .setOptimizeHashGeneration(true) .setPushTableWriteThroughUnion(false) .setDictionaryAggregation(true) .setPushAggregationThroughOuterJoin(false) diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java index 30e65e36940a..393ecc211cb3 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java @@ -65,9 +65,7 @@ import io.trino.sql.planner.plan.IndexJoinNode; import io.trino.sql.planner.plan.JoinNode; import io.trino.sql.planner.plan.LimitNode; -import io.trino.sql.planner.plan.MarkDistinctNode; import io.trino.sql.planner.plan.PlanNode; -import io.trino.sql.planner.plan.ProjectNode; import io.trino.sql.planner.plan.SemiJoinNode; import io.trino.sql.planner.plan.SemiJoinNode.DistributionType; import io.trino.sql.planner.plan.SortNode; @@ -101,9 +99,6 @@ import static io.trino.SystemSessionProperties.FILTERING_SEMI_JOIN_TO_INNER; import static io.trino.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static io.trino.SystemSessionProperties.JOIN_REORDERING_STRATEGY; -import static io.trino.SystemSessionProperties.OPTIMIZE_HASH_GENERATION; -import static io.trino.SystemSessionProperties.TASK_CONCURRENCY; -import static io.trino.metadata.TestMetadataManager.createTestMetadataManager; import static io.trino.spi.StandardErrorCode.SUBQUERY_MULTIPLE_ROWS; import static io.trino.spi.connector.SortOrder.ASC_NULLS_LAST; import static io.trino.spi.predicate.Domain.multipleValues; @@ -145,7 +140,6 @@ import static io.trino.sql.planner.assertions.PlanMatchPattern.expression; import static io.trino.sql.planner.assertions.PlanMatchPattern.filter; import static io.trino.sql.planner.assertions.PlanMatchPattern.groupId; -import static io.trino.sql.planner.assertions.PlanMatchPattern.identityProject; import static io.trino.sql.planner.assertions.PlanMatchPattern.join; import static io.trino.sql.planner.assertions.PlanMatchPattern.limit; import static io.trino.sql.planner.assertions.PlanMatchPattern.markDistinct; @@ -208,8 +202,6 @@ public class TestLogicalPlanner private static final ResolvedFunction FAIL = FUNCTIONS.resolveFunction("fail", fromTypes(INTEGER, VARCHAR)); private static final ResolvedFunction LOWER = FUNCTIONS.resolveFunction("lower", fromTypes(VARCHAR)); - private static final ResolvedFunction COMBINE_HASH = FUNCTIONS.resolveFunction("combine_hash", fromTypes(BIGINT, BIGINT)); - private static final ResolvedFunction HASH_CODE = createTestMetadataManager().resolveOperator(OperatorType.HASH_CODE, ImmutableList.of(BIGINT)); private static final ResolvedFunction CONCAT = FUNCTIONS.resolveFunction("concat", fromTypes(VARCHAR, VARCHAR)); private static final WindowNode.Frame ROWS_FROM_CURRENT = new WindowNode.Frame( @@ -1502,7 +1494,6 @@ public void testUsesDistributedJoinIfNaturallyPartitionedOnProbeSymbols() { Session broadcastJoin = Session.builder(this.getPlanTester().getDefaultSession()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) - .setSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.toString(false)) .build(); // replicated join with naturally partitioned and distributed probe side is rewritten to partitioned join @@ -1999,73 +1990,6 @@ public void testRedundantDistinctLimitNodeRemoval() values(ImmutableList.of("x"))))); } - @Test - public void testRedundantHashRemovalForUnionAll() - { - assertPlan( - "SELECT count(*) FROM ((SELECT nationkey FROM customer) UNION ALL (SELECT nationkey FROM customer)) GROUP BY nationkey", - Session.builder(getPlanTester().getDefaultSession()) - .setSystemProperty(OPTIMIZE_HASH_GENERATION, "true") - .build(), - output( - project( - node(AggregationNode.class, - exchange(LOCAL, REPARTITION, - project(ImmutableMap.of("hash", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "nationkey"))), new Constant(BIGINT, 0L)))))), - node(AggregationNode.class, - tableScan("customer", ImmutableMap.of("nationkey", "nationkey")))), - project(ImmutableMap.of("hash_1", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "nationkey_6"))), new Constant(BIGINT, 0L)))))), - node(AggregationNode.class, - tableScan("customer", ImmutableMap.of("nationkey_6", "nationkey"))))))))); - } - - @Test - public void testRedundantHashRemovalForMarkDistinct() - { - assertDistributedPlan( - "select count(*), count(distinct orderkey), count(distinct partkey), count(distinct suppkey) from lineitem", - Session.builder(this.getPlanTester().getDefaultSession()) - .setSystemProperty(OPTIMIZE_HASH_GENERATION, "true") - .setSystemProperty(TASK_CONCURRENCY, "16") - .setSystemProperty(DISTINCT_AGGREGATIONS_STRATEGY, "mark_distinct") - .build(), - output( - anyTree( - identityProject( - node(MarkDistinctNode.class, - anyTree( - project(ImmutableMap.of( - "hash_1", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "suppkey"))), new Constant(BIGINT, 0L))))), - "hash_2", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "partkey"))), new Constant(BIGINT, 0L)))))), - node(MarkDistinctNode.class, - tableScan("lineitem", ImmutableMap.of("suppkey", "suppkey", "partkey", "partkey")))))))))); - } - - @Test - public void testRedundantHashRemovalForUnionAllAndMarkDistinct() - { - assertDistributedPlan( - "SELECT count(distinct(custkey)), count(distinct(nationkey)) FROM ((SELECT custkey, nationkey FROM customer) UNION ALL ( SELECT custkey, custkey FROM customer))", - Session.builder(getPlanTester().getDefaultSession()) - .setSystemProperty(DISTINCT_AGGREGATIONS_STRATEGY, "mark_distinct") - .setSystemProperty(OPTIMIZE_HASH_GENERATION, "true") - .build(), - output( - anyTree( - node(MarkDistinctNode.class, - anyTree( - node(MarkDistinctNode.class, - exchange(LOCAL, REPARTITION, - exchange(REMOTE, REPARTITION, - project(ImmutableMap.of( - "hash_custkey", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "custkey"))), new Constant(BIGINT, 0L))))), - "hash_nationkey", expression(new Call(COMBINE_HASH, ImmutableList.of(new Constant(BIGINT, 0L), new Coalesce(new Call(HASH_CODE, ImmutableList.of(new Reference(BIGINT, "nationkey"))), new Constant(BIGINT, 0L)))))), - tableScan("customer", ImmutableMap.of("custkey", "custkey", "nationkey", "nationkey")))), - exchange(REMOTE, REPARTITION, - node(ProjectNode.class, - node(TableScanNode.class)))))))))); - } - @Test public void testRemoveRedundantFilter() { diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestPreAggregateCaseAggregations.java b/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestPreAggregateCaseAggregations.java index 9b825c16c8db..35c04a492348 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestPreAggregateCaseAggregations.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestPreAggregateCaseAggregations.java @@ -48,7 +48,6 @@ import java.util.Optional; import java.util.function.Predicate; -import static io.trino.SystemSessionProperties.OPTIMIZE_HASH_GENERATION; import static io.trino.SystemSessionProperties.PREFER_PARTIAL_AGGREGATION; import static io.trino.SystemSessionProperties.TASK_CONCURRENCY; import static io.trino.spi.type.BigintType.BIGINT; @@ -92,7 +91,6 @@ protected PlanTester createPlanTester() Session.SessionBuilder sessionBuilder = testSessionBuilder() .setCatalog("local") .setSchema("default") - .setSystemProperty(OPTIMIZE_HASH_GENERATION, "false") // remove hash computing projections for simplicity .setSystemProperty(PREFER_PARTIAL_AGGREGATION, "false") // remove partial aggregations for simplicity .setSystemProperty(TASK_CONCURRENCY, "1"); // these tests don't handle exchanges from local parallel diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestPrecomputedHashes.java b/core/trino-main/src/test/java/io/trino/sql/query/TestPrecomputedHashes.java deleted file mode 100644 index 6ba308a7ff40..000000000000 --- a/core/trino-main/src/test/java/io/trino/sql/query/TestPrecomputedHashes.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.sql.query; - -import io.trino.Session; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import static io.trino.SystemSessionProperties.OPTIMIZE_HASH_GENERATION; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; - -@TestInstance(PER_CLASS) -@Execution(CONCURRENT) -public class TestPrecomputedHashes -{ - private final QueryAssertions assertions; - - public TestPrecomputedHashes() - { - Session session = testSessionBuilder() - .setSystemProperty(OPTIMIZE_HASH_GENERATION, "true") - .build(); - - assertions = new QueryAssertions(session); - } - - @AfterAll - public void teardown() - { - assertions.close(); - } - - @Test - public void testDistinctLimit() - { - // see https://github.com/prestodb/presto/issues/11593 - assertThat(assertions.query( - "SELECT a " + - "FROM (" + - " SELECT a, b" + - " FROM (VALUES (1, 2)) t(a, b)" + - " WHERE a = 1" + - " GROUP BY a, b" + - " LIMIT 1" + - ")" + - "GROUP BY a")) - .matches("VALUES (1)"); - } - - @Test - public void testUnionAllAndDistinctLimit() - { - assertThat(assertions.query( - "WITH t(a, b) AS (VALUES (1, 10))" + - "SELECT" + - " count(DISTINCT if(type='A', a))," + - " count(DISTINCT if(type='A', b))" + - "FROM (" + - " SELECT a, b, 'A' AS type" + - " FROM t" + - " GROUP BY a, b" + - " UNION ALL" + - " SELECT a, b, 'B' AS type" + - " FROM t" + - " GROUP BY a, b)")) - .matches("VALUES (BIGINT '1', BIGINT '1')"); - } -} diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java index 3640db8c59fa..c78106b6b66c 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java @@ -668,21 +668,16 @@ public void testCountDistinctWithStringTypes() try (TestTable testTable = newTrinoTable("distinct_strings", "(t_char CHAR(5), t_varchar VARCHAR(5))", rows)) { if (!(hasBehavior(SUPPORTS_AGGREGATION_PUSHDOWN) && hasBehavior(SUPPORTS_PREDICATE_PUSHDOWN_WITH_VARCHAR_INEQUALITY))) { - // disabling hash generation to prevent extra projections in the plan which make it hard to write matchers for isNotFullyPushedDown - Session optimizeHashGenerationDisabled = Session.builder(getSession()) - .setSystemProperty("optimize_hash_generation", "false") - .build(); - // It is not captured in the `isNotFullyPushedDown` calls (can't do that) but depending on the connector in use some aggregations // still can be pushed down to connector. // If `SUPPORTS_AGGREGATION_PUSHDOWN == false` but `SUPPORTS_PREDICATE_PUSHDOWN_WITH_VARCHAR_INEQUALITY == true` the DISTINCT part of aggregation // will still be pushed down to connector as `GROUP BY`. Only the `count` part will remain on the Trino side. // If `SUPPORTS_PREDICATE_PUSHDOWN_WITH_VARCHAR_INEQUALITY == false` both parts of aggregation will be executed on Trino side. - assertThat(query(optimizeHashGenerationDisabled, "SELECT count(DISTINCT t_varchar) FROM " + testTable.getName())) + assertThat(query("SELECT count(DISTINCT t_varchar) FROM " + testTable.getName())) .matches("VALUES BIGINT '7'") .isNotFullyPushedDown(AggregationNode.class); - assertThat(query(optimizeHashGenerationDisabled, "SELECT count(DISTINCT t_char) FROM " + testTable.getName())) + assertThat(query("SELECT count(DISTINCT t_char) FROM " + testTable.getName())) .matches("VALUES BIGINT '7'") .isNotFullyPushedDown(AggregationNode.class); @@ -1258,7 +1253,6 @@ public void testJoinPushdownDisabled() // Disable dynamic filtering so that expected plans in case of no pushdown remain "simple" .setSystemProperty("enable_dynamic_filtering", "false") // Disable optimized hash generation so that expected plans in case of no pushdown remain "simple" - .setSystemProperty("optimize_hash_generation", "false") .build(); assertThat(query(noJoinPushdown, "SELECT r.name, n.name FROM nation n JOIN region r ON n.regionkey = r.regionkey")) diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlLegacyConnectorTest.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlLegacyConnectorTest.java index fbfc2e462cd7..2f37c7b6a84f 100644 --- a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlLegacyConnectorTest.java +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlLegacyConnectorTest.java @@ -110,18 +110,13 @@ public void testCountDistinctWithStringTypes() .collect(toImmutableList()); try (TestTable testTable = new TestTable(onRemoteDatabase(), "tpch.distinct_strings", "(t_char CHAR(5) CHARACTER SET utf8mb4, t_varchar VARCHAR(5) CHARACTER SET utf8mb4)", rows)) { - // disabling hash generation to prevent extra projections in the plan which make it hard to write matchers for isNotFullyPushedDown - Session optimizeHashGenerationDisabled = Session.builder(getSession()) - .setSystemProperty("optimize_hash_generation", "false") - .build(); - // It is not captured in the `isNotFullyPushedDown` calls (can't do that) but depending on the connector in use some aggregations // still can be pushed down to connector. // the DISTINCT part of aggregation will still be pushed down to connector as `GROUP BY`. Only the `count` part will remain on the Trino side. - assertThat(query(optimizeHashGenerationDisabled, "SELECT count(DISTINCT t_varchar) FROM " + testTable.getName())) + assertThat(query("SELECT count(DISTINCT t_varchar) FROM " + testTable.getName())) .matches("VALUES BIGINT '7'") .isNotFullyPushedDown(AggregationNode.class); - assertThat(query(optimizeHashGenerationDisabled, "SELECT count(DISTINCT t_char) FROM " + testTable.getName())) + assertThat(query("SELECT count(DISTINCT t_char) FROM " + testTable.getName())) .matches("VALUES BIGINT '7'") .isNotFullyPushedDown(AggregationNode.class); diff --git a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java index 0082632aad17..ed5461d58d14 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java @@ -1460,8 +1460,6 @@ public void testDescribeOutputNonSelect() assertDescribeOutputRowCount("CREATE TABLE foo AS SELECT * FROM nation"); assertDescribeOutputEmpty("CALL foo()"); - assertDescribeOutputEmpty("SET SESSION optimize_hash_generation=false"); - assertDescribeOutputEmpty("RESET SESSION optimize_hash_generation"); assertDescribeOutputEmpty("START TRANSACTION"); assertDescribeOutputEmpty("COMMIT"); assertDescribeOutputEmpty("ROLLBACK"); diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestDistributedQueriesNoHashGeneration.java b/testing/trino-tests/src/test/java/io/trino/tests/TestDistributedQueriesNoHashGeneration.java deleted file mode 100644 index a1b78ca2d6ac..000000000000 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestDistributedQueriesNoHashGeneration.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.tests; - -import com.google.common.collect.ImmutableMap; -import io.trino.testing.AbstractTestQueries; -import io.trino.testing.QueryRunner; -import io.trino.tests.tpch.TpchQueryRunner; - -public class TestDistributedQueriesNoHashGeneration - extends AbstractTestQueries -{ - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - return TpchQueryRunner.builder() - .setCoordinatorProperties(ImmutableMap.of("optimizer.optimize-hash-generation", "false")) - .build(); - } -}