Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ public final class SystemSessionProperties
public static final String QUERY_CLIENT_TIMEOUT = "query_client_timeout";
public static final String REWRITE_MIN_MAX_BY_TO_TOP_N = "rewrite_min_max_by_to_top_n";
public static final String ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD = "add_distinct_below_semi_join_build";
public static final String REWRITE_BUCKETED_SEMI_JOIN_TO_INNER_JOIN = "rewrite_bucketed_semi_join_to_inner_join";
public static final String MERGE_MAX_BY_AND_MIN_BY_AGGREGATIONS = "merge_max_by_and_min_by_aggregations";
public static final String UTILIZE_UNIQUE_PROPERTY_IN_QUERY_PLANNING = "utilize_unique_property_in_query_planning";
public static final String PUSHDOWN_SUBFIELDS_FOR_MAP_FUNCTIONS = "pushdown_subfields_for_map_functions";
Expand Down Expand Up @@ -2205,6 +2206,10 @@ public SystemSessionProperties(
"Add distinct aggregation below semi join build",
featuresConfig.isAddDistinctBelowSemiJoinBuild(),
false),
booleanProperty(REWRITE_BUCKETED_SEMI_JOIN_TO_INNER_JOIN,
"Rewrite semi join to inner join when both sides are bucketed by the join key",
featuresConfig.isRewriteBucketedSemiJoinToInnerJoin(),
false),
booleanProperty(MERGE_MAX_BY_AND_MIN_BY_AGGREGATIONS,
"Merge multiple max_by or min_by aggregations with the same comparison key into a single aggregation with ROW argument",
featuresConfig.isMergeMaxByMinByAggregationsEnabled(),
Expand Down Expand Up @@ -3718,6 +3723,11 @@ public static boolean isAddDistinctBelowSemiJoinBuildEnabled(Session session)
return session.getSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, Boolean.class);
}

public static boolean isRewriteBucketedSemiJoinToInnerJoinEnabled(Session session)
{
return session.getSystemProperty(REWRITE_BUCKETED_SEMI_JOIN_TO_INNER_JOIN, Boolean.class);
}

public static boolean isMergeMaxByMinByAggregationsEnabled(Session session)
{
return session.getSystemProperty(MERGE_MAX_BY_AND_MIN_BY_AGGREGATIONS, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ public class FeaturesConfig
private String expressionOptimizerName = DEFAULT_EXPRESSION_OPTIMIZER_NAME;
private boolean addExchangeBelowPartialAggregationOverGroupId;
private boolean addDistinctBelowSemiJoinBuild;
private boolean rewriteBucketedSemiJoinToInnerJoin;
private boolean mergeMaxByMinByAggregationsEnabled;
private boolean pushdownSubfieldForMapFunctions = true;
private boolean pushdownSubfieldForCardinality;
Expand Down Expand Up @@ -3417,6 +3418,19 @@ public boolean isAddDistinctBelowSemiJoinBuild()
return addDistinctBelowSemiJoinBuild;
}

@Config("optimizer.rewrite-bucketed-semi-join-to-inner-join")
@ConfigDescription("Rewrite semi join to inner join when both sides are bucketed by the join key")
public FeaturesConfig setRewriteBucketedSemiJoinToInnerJoin(boolean rewriteBucketedSemiJoinToInnerJoin)
{
this.rewriteBucketedSemiJoinToInnerJoin = rewriteBucketedSemiJoinToInnerJoin;
return this;
}

public boolean isRewriteBucketedSemiJoinToInnerJoin()
{
return rewriteBucketedSemiJoinToInnerJoin;
}

@Config("optimizer.merge-max-by-and-min-by-aggregations")
@ConfigDescription("Merge multiple max_by or min_by aggregations with the same comparison key into a single aggregation with ROW argument")
public FeaturesConfig setMergeMaxByMinByAggregationsEnabled(boolean mergeMaxByMinByAggregationsEnabled)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@
import com.facebook.presto.sql.planner.iterative.rule.ReorderJoins;
import com.facebook.presto.sql.planner.iterative.rule.ReplaceConditionalApproxDistinct;
import com.facebook.presto.sql.planner.iterative.rule.RewriteAggregationIfToFilter;
import com.facebook.presto.sql.planner.iterative.rule.RewriteBucketedSemiJoinToInnerJoin;
import com.facebook.presto.sql.planner.iterative.rule.RewriteCaseExpressionPredicate;
import com.facebook.presto.sql.planner.iterative.rule.RewriteCaseToMap;
import com.facebook.presto.sql.planner.iterative.rule.RewriteConstantArrayContainsToInExpression;
Expand Down Expand Up @@ -666,6 +667,12 @@ public PlanOptimizers(
estimatedExchangesCostCalculator,
ImmutableSet.of(
new LeftJoinWithArrayContainsToEquiJoinCondition(metadata.getFunctionAndTypeManager()))),
new IterativeOptimizer(
metadata,
ruleStats,
statsCalculator,
estimatedExchangesCostCalculator,
ImmutableSet.of(new RewriteBucketedSemiJoinToInnerJoin(metadata))),
new IterativeOptimizer(
metadata,
ruleStats,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql.planner.iterative.rule;

import com.facebook.presto.Session;
import com.facebook.presto.matching.Captures;
import com.facebook.presto.matching.Pattern;
import com.facebook.presto.metadata.Metadata;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.plan.AggregationNode;
import com.facebook.presto.spi.plan.Assignments;
import com.facebook.presto.spi.plan.EquiJoinClause;
import com.facebook.presto.spi.plan.FilterNode;
import com.facebook.presto.spi.plan.JoinNode;
import com.facebook.presto.spi.plan.JoinType;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.ProjectNode;
import com.facebook.presto.spi.plan.SemiJoinNode;
import com.facebook.presto.spi.plan.TableScanNode;
import com.facebook.presto.spi.relation.RowExpression;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.planner.iterative.Rule;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.SystemSessionProperties.isRewriteBucketedSemiJoinToInnerJoinEnabled;
import static com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT;
import static com.facebook.presto.spi.plan.AggregationNode.Step.SINGLE;
import static com.facebook.presto.spi.plan.AggregationNode.isDistinct;
import static com.facebook.presto.spi.plan.AggregationNode.singleGroupingSet;
import static com.facebook.presto.sql.planner.plan.AssignmentUtils.identityAssignments;
import static com.facebook.presto.sql.planner.plan.Patterns.semiJoin;
import static java.util.Objects.requireNonNull;

/**
* When both sides of a semi-join are backed by tables bucketed on the semi-join key,
* rewrite the SemiJoinNode to a colocated INNER JOIN with a DISTINCT on the right side.
* This avoids data shuffle since both sides are already co-partitioned by the join key.
* <pre>
* - SemiJoin (l.key IN r.key) → semiJoinOutput
* - scan l (bucketed by key)
* - scan r (bucketed by key)
* </pre>
* into:
* <pre>
* - Project (semiJoinOutput := TRUE, identity(l.*))
* - InnerJoin (l.key = r.key), output = l.*
* - scan l
* - Aggregate (DISTINCT r.key)
* - scan r
* </pre>
*/
public class RewriteBucketedSemiJoinToInnerJoin
implements Rule<SemiJoinNode>
{
private static final String BUCKETED_BY_PROPERTY = "bucketed_by";

private final Metadata metadata;

public RewriteBucketedSemiJoinToInnerJoin(Metadata metadata)
{
this.metadata = requireNonNull(metadata, "metadata is null");
}

@Override
public Pattern<SemiJoinNode> getPattern()
{
return semiJoin();
}

@Override
public boolean isEnabled(Session session)
{
return isRewriteBucketedSemiJoinToInnerJoinEnabled(session);
}

@Override
public Result apply(SemiJoinNode node, Captures captures, Context context)
{
VariableReferenceExpression sourceJoinVariable = node.getSourceJoinVariable();
VariableReferenceExpression filteringSourceJoinVariable = node.getFilteringSourceJoinVariable();

PlanNode resolvedSource = context.getLookup().resolve(node.getSource());
Optional<TableScanInfo> sourceInfo = findTableScanAndResolveVariable(resolvedSource, sourceJoinVariable, context);
if (!sourceInfo.isPresent()) {
return Result.empty();
}

PlanNode resolvedFilteringSource = context.getLookup().resolve(node.getFilteringSource());
Optional<TableScanInfo> filteringInfo = findTableScanAndResolveVariable(resolvedFilteringSource, filteringSourceJoinVariable, context);
if (!filteringInfo.isPresent()) {
return Result.empty();
}

Session session = context.getSession();
if (!isBucketedByColumn(sourceInfo.get(), session)) {
return Result.empty();
}
if (!isBucketedByColumn(filteringInfo.get(), session)) {
return Result.empty();
}

PlanNode distinctFilteringSource;
if (isOutputDistinct(resolvedFilteringSource, filteringSourceJoinVariable, context)) {
distinctFilteringSource = node.getFilteringSource();
}
else {
distinctFilteringSource = new AggregationNode(
node.getSourceLocation(),
context.getIdAllocator().getNextId(),
resolvedFilteringSource,
ImmutableMap.of(),
singleGroupingSet(ImmutableList.of(filteringSourceJoinVariable)),
ImmutableList.of(),
SINGLE,
Optional.empty(),
Optional.empty(),
Optional.empty());
}

JoinNode innerJoin = new JoinNode(
node.getSourceLocation(),
context.getIdAllocator().getNextId(),
JoinType.INNER,
node.getSource(),
distinctFilteringSource,
ImmutableList.of(new EquiJoinClause(sourceJoinVariable, filteringSourceJoinVariable)),
ImmutableList.<VariableReferenceExpression>builder()
.addAll(node.getSource().getOutputVariables())
.build(),
Optional.empty(),
Optional.empty(),
Optional.empty(),
Optional.empty(),
ImmutableMap.of());

VariableReferenceExpression semiJoinOutput = node.getSemiJoinOutput();
ImmutableList<VariableReferenceExpression> referencedOutputs = ImmutableList.<VariableReferenceExpression>builder()
.addAll(node.getSource().getOutputVariables())
.add(semiJoinOutput)
.build();

ProjectNode projectNode = new ProjectNode(
context.getIdAllocator().getNextId(),
innerJoin,
Assignments.builder()
.putAll(identityAssignments(innerJoin.getOutputVariables()))
.put(semiJoinOutput, TRUE_CONSTANT)
.build()
.filter(referencedOutputs));

return Result.ofPlanNode(projectNode);
}

private Optional<TableScanInfo> findTableScanAndResolveVariable(PlanNode node, VariableReferenceExpression variable, Context context)
{
if (node instanceof TableScanNode) {
TableScanNode tableScan = (TableScanNode) node;
ColumnHandle columnHandle = tableScan.getAssignments().get(variable);
if (columnHandle == null) {
return Optional.empty();
}
return Optional.of(new TableScanInfo(tableScan, columnHandle));
}
else if (node instanceof ProjectNode) {
ProjectNode projectNode = (ProjectNode) node;
RowExpression inputExpression = projectNode.getAssignments().get(variable);
if (inputExpression instanceof VariableReferenceExpression) {
return findTableScanAndResolveVariable(
context.getLookup().resolve(projectNode.getSource()),
(VariableReferenceExpression) inputExpression,
context);
}
return Optional.empty();
}
else if (node instanceof FilterNode) {
return findTableScanAndResolveVariable(
context.getLookup().resolve(((FilterNode) node).getSource()),
variable,
context);
}
return Optional.empty();
}

private boolean isBucketedByColumn(TableScanInfo info, Session session)
{
Map<String, Object> properties = metadata.getTableMetadata(session, info.tableScan.getTable()).getMetadata().getProperties();
Object bucketedByValue = properties.get(BUCKETED_BY_PROPERTY);
if (!(bucketedByValue instanceof List)) {
return false;
}
List<?> bucketColumns = (List<?>) bucketedByValue;
if (bucketColumns.isEmpty()) {
return false;
}
String columnName = metadata.getColumnMetadata(session, info.tableScan.getTable(), info.columnHandle).getName();
return bucketColumns.contains(columnName);
}

private boolean isOutputDistinct(PlanNode node, VariableReferenceExpression output, Context context)
{
if (node instanceof AggregationNode) {
AggregationNode aggregationNode = (AggregationNode) node;
return isDistinct(aggregationNode) &&
aggregationNode.getGroupingKeys().size() == 1 &&
aggregationNode.getGroupingKeys().contains(output);
}
else if (node instanceof ProjectNode) {
ProjectNode projectNode = (ProjectNode) node;
RowExpression inputExpression = projectNode.getAssignments().get(output);
if (inputExpression instanceof VariableReferenceExpression) {
return isOutputDistinct(
context.getLookup().resolve(projectNode.getSource()),
(VariableReferenceExpression) inputExpression,
context);
}
return false;
}
else if (node instanceof FilterNode) {
return isOutputDistinct(
context.getLookup().resolve(((FilterNode) node).getSource()),
output,
context);
}
return false;
}

private static class TableScanInfo
{
final TableScanNode tableScan;
final ColumnHandle columnHandle;

TableScanInfo(TableScanNode tableScan, ColumnHandle columnHandle)
{
this.tableScan = tableScan;
this.columnHandle = columnHandle;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ public void testDefaults()
.setExcludeInvalidWorkerSessionProperties(false)
.setAddExchangeBelowPartialAggregationOverGroupId(false)
.setAddDistinctBelowSemiJoinBuild(false)
.setRewriteBucketedSemiJoinToInnerJoin(false)
.setMergeMaxByMinByAggregationsEnabled(false)
.setTryFunctionCatchableErrors("")
.setPushdownSubfieldForMapFunctions(true)
Expand Down Expand Up @@ -532,6 +533,7 @@ public void testExplicitPropertyMappings()
.put("expression-optimizer-name", "custom")
.put("exclude-invalid-worker-session-properties", "true")
.put("optimizer.add-distinct-below-semi-join-build", "true")
.put("optimizer.rewrite-bucketed-semi-join-to-inner-join", "true")
.put("optimizer.merge-max-by-and-min-by-aggregations", "true")
.put("try-function-catchable-errors", "GENERIC_INTERNAL_ERROR,INVALID_ARGUMENTS")
.put("optimizer.pushdown-subfield-for-map-functions", "false")
Expand Down Expand Up @@ -771,6 +773,7 @@ public void testExplicitPropertyMappings()
.setExcludeInvalidWorkerSessionProperties(true)
.setAddExchangeBelowPartialAggregationOverGroupId(true)
.setAddDistinctBelowSemiJoinBuild(true)
.setRewriteBucketedSemiJoinToInnerJoin(true)
.setMergeMaxByMinByAggregationsEnabled(true)
.setTryFunctionCatchableErrors("GENERIC_INTERNAL_ERROR,INVALID_ARGUMENTS")
.setPushdownSubfieldForMapFunctions(false)
Expand Down
Loading
Loading