From 6df7aba3b84cd6a6ff54f5a10336b242fee3584d Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:20:11 +0800 Subject: [PATCH] planner: support generating `leading` and `hash_join_build` hint from physical plan (#55195) close pingcap/tidb#55280 --- pkg/bindinfo/capture_test.go | 2 +- .../clustertablestest/cluster_tables_test.go | 98 +++-- .../cbotest/testdata/analyze_suite_out.json | 2 +- .../casetest/dag/testdata/plan_suite_out.json | 8 +- .../testdata/plan_suite_out.json | 36 +- pkg/planner/core/hint_utils.go | 374 ++++++++++++++++-- .../r/planner/core/physical_plan.result | 4 +- 7 files changed, 442 insertions(+), 82 deletions(-) diff --git a/pkg/bindinfo/capture_test.go b/pkg/bindinfo/capture_test.go index d9ea79524a01d..dfda50c3d12ed 100644 --- a/pkg/bindinfo/capture_test.go +++ b/pkg/bindinfo/capture_test.go @@ -418,7 +418,7 @@ func TestUpdateSubqueryCapture(t *testing.T) { tk.MustExec("admin capture bindings") rows := tk.MustQuery("show global bindings").Rows() require.Len(t, rows, 1) - bindSQL := "UPDATE /*+ hash_join(@`upd_1` `test`.`t1`), use_index(@`upd_1` `test`.`t1` `idx_b`), no_order_index(@`upd_1` `test`.`t1` `idx_b`), use_index(@`sel_1` `test`.`t2` ), use_index(@`sel_2` `test`.`t2` )*/ `test`.`t1` SET `b`=1 WHERE `b` = 2 AND (`a` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1) OR `c` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1))" + bindSQL := "UPDATE /*+ hash_join(`test`.`t2`@`sel_2`), hash_join(`test`.`t1`), use_index(@`upd_1` `test`.`t1` `idx_b`), no_order_index(@`upd_1` `test`.`t1` `idx_b`), use_index(@`sel_1` `test`.`t2` ), use_index(@`sel_2` `test`.`t2` )*/ `test`.`t1` SET `b`=1 WHERE `b` = 2 AND (`a` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1) OR `c` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1))" originSQL := "UPDATE `test`.`t1` SET `b`=1 WHERE `b` = 2 AND (`a` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1) OR `c` IN (SELECT `a` FROM `test`.`t2` WHERE `b` = 1))" require.Equal(t, bindSQL, rows[0][1]) tk.MustExec(originSQL) diff --git a/pkg/infoschema/test/clustertablestest/cluster_tables_test.go b/pkg/infoschema/test/clustertablestest/cluster_tables_test.go index e69c585554fe9..4070b31429020 100644 --- a/pkg/infoschema/test/clustertablestest/cluster_tables_test.go +++ b/pkg/infoschema/test/clustertablestest/cluster_tables_test.go @@ -944,6 +944,8 @@ func TestQuickBinding(t *testing.T) { tk.MustExec("use test") tk.MustExec(`create table t1 (pk int, a int, b int, c int, primary key(pk), key k_a(a), key k_bc(b, c))`) tk.MustExec(`create table t2 (a int, b int, c int, key k_a(a), key k_bc(b, c))`) // no primary key + tk.MustExec(`create table t3 (a int, b int, c int, key k_a(a), key k_bc(b, c))`) + tk.MustExec(`create table t4 (a int, b int, c int, key k_a(a), key k_bc(b, c))`) type testCase struct { template string @@ -971,40 +973,42 @@ func TestQuickBinding(t *testing.T) { {`select a+b+? from (select /*+ stream_agg() */ count(*) as a from t1) tt1, (select /*+ hash_agg() */ count(*) as b from t1) tt2`, "stream_agg(@`sel_2`), use_index(@`sel_2` `test`.`t1` `k_a`), no_order_index(@`sel_2` `test`.`t1` `k_a`), agg_to_cop(@`sel_2`), hash_agg(@`sel_3`), use_index(@`sel_3` `test`.`t1` `k_a`), no_order_index(@`sel_3` `test`.`t1` `k_a`), agg_to_cop(@`sel_3`)", nil}, // 2-way hash joins - {`select /*+ hash_join(t1, t2), use_index(t1), use_index(t2) */ t1.* from t1, t2 where t1.a=t2.a and t1.aIndexLookUp(Index(t.c_d_e)[[NULL,NULL]], Table(t))}(test.t.c,test.t.c)->Delete", - "Hints": "inl_join(@`del_1` `test`.`t2`), use_index(@`del_1` `test`.`t1` ), no_order_index(@`del_1` `test`.`t1` `primary`), use_index(@`del_1` `test`.`t2` `c_d_e`), no_order_index(@`del_1` `test`.`t2` `c_d_e`)" + "Hints": "inl_join(`test`.`t2`), use_index(@`del_1` `test`.`t1` ), no_order_index(@`del_1` `test`.`t1` `primary`), use_index(@`del_1` `test`.`t2` `c_d_e`), no_order_index(@`del_1` `test`.`t2` `c_d_e`)" }, { "SQL": "delete /*+ TIDB_SMJ(t1, t2) */ from t1 using t t1, t t2 where t1.c=t2.c", "Best": "MergeInnerJoin{IndexLookUp(Index(t.c_d_e)[[NULL,+inf]], Table(t))->IndexLookUp(Index(t.c_d_e)[[NULL,+inf]], Table(t))}(test.t.c,test.t.c)->Delete", - "Hints": "merge_join(@`del_1` `test`.`t1`), use_index(@`del_1` `test`.`t1` `c_d_e`), order_index(@`del_1` `test`.`t1` `c_d_e`), use_index(@`del_1` `test`.`t2` `c_d_e`), order_index(@`del_1` `test`.`t2` `c_d_e`)" + "Hints": "merge_join(`test`.`t1`), use_index(@`del_1` `test`.`t1` `c_d_e`), order_index(@`del_1` `test`.`t1` `c_d_e`), use_index(@`del_1` `test`.`t2` `c_d_e`), order_index(@`del_1` `test`.`t2` `c_d_e`)" }, { "SQL": "update /*+ TIDB_SMJ(t1, t2) */ t t1, t t2 set t1.c=1, t2.c=1 where t1.a=t2.a", "Best": "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Update", - "Hints": "merge_join(@`upd_1` `test`.`t1`), use_index(@`upd_1` `test`.`t1` ), order_index(@`upd_1` `test`.`t1` `primary`), use_index(@`upd_1` `test`.`t2` ), order_index(@`upd_1` `test`.`t2` `primary`)" + "Hints": "merge_join(`test`.`t1`), use_index(@`upd_1` `test`.`t1` ), order_index(@`upd_1` `test`.`t1` `primary`), use_index(@`upd_1` `test`.`t2` ), order_index(@`upd_1` `test`.`t2` `primary`)" }, { "SQL": "update /*+ TIDB_HJ(t1, t2) */ t t1, t t2 set t1.c=1, t2.c=1 where t1.a=t2.a", "Best": "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Update", - "Hints": "hash_join(@`upd_1` `test`.`t1`), use_index(@`upd_1` `test`.`t1` ), no_order_index(@`upd_1` `test`.`t1` `primary`), use_index(@`upd_1` `test`.`t2` ), no_order_index(@`upd_1` `test`.`t2` `primary`)" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`upd_1` `test`.`t1` ), no_order_index(@`upd_1` `test`.`t1` `primary`), use_index(@`upd_1` `test`.`t2` ), no_order_index(@`upd_1` `test`.`t2` `primary`)" }, { "SQL": "delete from t where b < 1 order by d limit 1", diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json index 50ed5078eb428..b440550618083 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json @@ -1883,19 +1883,19 @@ "SQL": "select /*+ USE_INDEX(t1, c_d_e), USE_INDEX(t2, f) */ * from t t1, t t2 where t1.a = t2.b", "Best": "LeftHashJoin{IndexLookUp(Index(t.c_d_e)[[NULL,+inf]], Table(t))->IndexLookUp(Index(t.f)[[NULL,+inf]], Table(t))}(test.t.a,test.t.b)", "HasWarn": false, - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` `c_d_e`), no_order_index(@`sel_1` `test`.`t1` `c_d_e`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` `c_d_e`), no_order_index(@`sel_1` `test`.`t1` `c_d_e`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" }, { "SQL": "select /*+ IGNORE_INDEX(t1, c_d_e), IGNORE_INDEX(t2, f), HASH_JOIN(t1) */ * from t t1, t t2 where t1.a = t2.b", "Best": "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.b)", "HasWarn": false, - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`)" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`)" }, { "SQL": "select /*+ FORCE_INDEX(t1, c_d_e), FORCE_INDEX(t2, f) */ * from t t1, t t2 where t1.a = t2.b", "Best": "LeftHashJoin{IndexLookUp(Index(t.c_d_e)[[NULL,+inf]], Table(t))->IndexLookUp(Index(t.f)[[NULL,+inf]], Table(t))}(test.t.a,test.t.b)", "HasWarn": false, - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` `c_d_e`), no_order_index(@`sel_1` `test`.`t1` `c_d_e`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` `c_d_e`), no_order_index(@`sel_1` `test`.`t1` `c_d_e`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" }, { "SQL": "select /*+ USE_INDEX(t, c_d_e, f, g) */ * from t order by f", @@ -2387,25 +2387,25 @@ "SQL": "select /*+ TIDB_INLJ(t1) */ t1.a, t2.a, t3.a from t t1, t t2, t t3 where t1.a = t2.a and t2.a = t3.a;", "Best": "MergeInnerJoin{IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->TableReader(Table(t))}(test.t.a,test.t.a)", "Warning": "", - "Hints": "merge_join(@`sel_1` `test`.`t3`), inl_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t3` ), order_index(@`sel_1` `test`.`t3` `primary`)" + "Hints": "merge_join(`test`.`t3`), leading(`test`.`t1`, `test`.`t2`, `test`.`t3`), inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t3` ), order_index(@`sel_1` `test`.`t3` `primary`)" }, { "SQL": "select /*+ TIDB_INLJ(test.t1) */ t1.a, t2.a, t3.a from t t1, t t2, t t3 where t1.a = t2.a and t2.a = t3.a;", "Best": "MergeInnerJoin{IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->TableReader(Table(t))}(test.t.a,test.t.a)", "Warning": "", - "Hints": "merge_join(@`sel_1` `test`.`t3`), inl_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t3` ), order_index(@`sel_1` `test`.`t3` `primary`)" + "Hints": "merge_join(`test`.`t3`), leading(`test`.`t1`, `test`.`t2`, `test`.`t3`), inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` ), order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t3` ), order_index(@`sel_1` `test`.`t3` `primary`)" }, { "SQL": "select /*+ TIDB_INLJ(t1) */ t1.b, t2.a from t t1, t t2 where t1.b = t2.a;", "Best": "LeftHashJoin{TableReader(Table(t))->IndexReader(Index(t.f)[[NULL,+inf]])}(test.t.b,test.t.a)", "Warning": "[planner:1815]Optimizer Hint /*+ INL_JOIN(t1) */ or /*+ TIDB_INLJ(t1) */ is inapplicable", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), no_order_index(@`sel_1` `test`.`t1` `primary`), use_index(@`sel_1` `test`.`t2` `f`), no_order_index(@`sel_1` `test`.`t2` `f`)" }, { "SQL": "select /*+ TIDB_INLJ(t2) */ t1.b, t2.a from t2 t1, t2 t2 where t1.b=t2.b and t2.c=-1;", "Best": "IndexJoin{TableReader(Table(t2)->Sel([eq(test.t2.c, -1)]))->IndexReader(Index(t2.b)[[NULL,NULL]])}(test.t2.b,test.t2.b)->Projection", "Warning": "[planner:1815]Optimizer Hint /*+ INL_JOIN(t2) */ or /*+ TIDB_INLJ(t2) */ is inapplicable", - "Hints": "inl_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t1` `b`), no_order_index(@`sel_1` `test`.`t1` `b`)" + "Hints": "inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t1` `b`), no_order_index(@`sel_1` `test`.`t1` `b`)" } ] }, @@ -2525,7 +2525,7 @@ { "SQL": "select /*+ HASH_JOIN(@sel_2 t1@sel_2, t2@sel_2), MERGE_JOIN(@sel_1 t1@sel_1, t2@sel_1) */ * from (select t1.a, t1.b from t t1, t t2 where t1.a = t2.a) t1, t t2 where t1.b = t2.b", "Plan": "MergeInnerJoin{TableReader(Table(t))->Sort->LeftHashJoin{TableReader(Table(t))->IndexReader(Index(t.f)[[NULL,+inf]])}(test.t.a,test.t.a)->Sort}(test.t.b,test.t.b)->Projection", - "Hints": "use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`), hash_join(@`sel_2` `test`.`t1`), use_index(@`sel_2` `test`.`t1` ), no_order_index(@`sel_2` `test`.`t1` `primary`), use_index(@`sel_2` `test`.`t2` `f`), no_order_index(@`sel_2` `test`.`t2` `f`)" + "Hints": "use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`), hash_join_build(@`sel_2` `test`.`t2`@`sel_2`), use_index(@`sel_2` `test`.`t1` ), no_order_index(@`sel_2` `test`.`t1` `primary`), use_index(@`sel_2` `test`.`t2` `f`), no_order_index(@`sel_2` `test`.`t2` `f`)" } ] }, @@ -2658,52 +2658,52 @@ { "SQL": "select /*+ HASH_JOIN(t1) */ t1.b, t2.b from t1, t2 where t1.a = t2.a;", "Plan": "LeftHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.a))]))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t1.a,test.t2.a)", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ HASH_JOIN(t1) */ t1.b, t2.b from t1 inner join t2 on t1.a = t2.a;", "Plan": "LeftHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.a))]))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t1.a,test.t2.a)", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ HASH_JOIN(t1) */ t1.b, t2.b from t1 left outer join t2 on t1.a = t2.a;", "Plan": "LeftHashJoin{TableReader(Table(t1))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t1.a,test.t2.a)", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ HASH_JOIN(t1) */ t1.b, t2.b from t1 right outer join t2 on t1.a = t2.a;", "Plan": "RightHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.a))]))->TableReader(Table(t2))}(test.t1.a,test.t2.a)", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" + "Hints": "hash_join_build(`test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select 1 from (select /*+ HASH_JOIN(t1) */ t1.a in (select t2.a from t2) from t1) x;", "Plan": "LeftHashJoin{IndexReader(Index(t1.idx_a)[[NULL,+inf]])->IndexReader(Index(t2.idx_a)[[NULL,+inf]])}->Projection", - "Hints": "hash_join(@`sel_2` `test`.`t1`), use_index(@`sel_2` `test`.`t1` `idx_a`), no_order_index(@`sel_2` `test`.`t1` `idx_a`), use_index(@`sel_3` `test`.`t2` `idx_a`), no_order_index(@`sel_3` `test`.`t2` `idx_a`)" + "Hints": "hash_join(@`sel_2` `test`.`t1`@`sel_2`), use_index(@`sel_2` `test`.`t1` `idx_a`), no_order_index(@`sel_2` `test`.`t1` `idx_a`), use_index(@`sel_3` `test`.`t2` `idx_a`), no_order_index(@`sel_3` `test`.`t2` `idx_a`)" }, { "SQL": "select 1 from (select /*+ HASH_JOIN(t1) */ t1.a not in (select t2.a from t2) from t1) x;", "Plan": "LeftHashJoin{IndexReader(Index(t1.idx_a)[[NULL,+inf]])->IndexReader(Index(t2.idx_a)[[NULL,+inf]])}->Projection", - "Hints": "hash_join(@`sel_2` `test`.`t1`), use_index(@`sel_2` `test`.`t1` `idx_a`), no_order_index(@`sel_2` `test`.`t1` `idx_a`), use_index(@`sel_3` `test`.`t2` `idx_a`), no_order_index(@`sel_3` `test`.`t2` `idx_a`)" + "Hints": "hash_join(@`sel_2` `test`.`t1`@`sel_2`), use_index(@`sel_2` `test`.`t1` `idx_a`), no_order_index(@`sel_2` `test`.`t1` `idx_a`), use_index(@`sel_3` `test`.`t2` `idx_a`), no_order_index(@`sel_3` `test`.`t2` `idx_a`)" }, { "SQL": "select /*+ INL_JOIN(t1) */ t1.b, t2.b from t1 inner join t2 on t1.a = t2.a;", "Plan": "IndexJoin{IndexLookUp(Index(t1.idx_a)[[NULL,NULL]]->Sel([not(isnull(test.t1.a))]), Table(t1))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t2.a,test.t1.a)", - "Hints": "inl_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), no_order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` )" + "Hints": "inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), no_order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ INL_HASH_JOIN(t1) */ t1.b, t2.b from t1 inner join t2 on t1.a = t2.a;", "Plan": "IndexHashJoin{IndexLookUp(Index(t1.idx_a)[[NULL,NULL]]->Sel([not(isnull(test.t1.a))]), Table(t1))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t2.a,test.t1.a)", - "Hints": "inl_hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), no_order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` )" + "Hints": "inl_hash_join(`test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), no_order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ INL_MERGE_JOIN(t1) */ t1.b, t2.b from t1 inner join t2 on t1.a = t2.a;", "Plan": "LeftHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.a))]))->TableReader(Table(t2)->Sel([not(isnull(test.t2.a))]))}(test.t1.a,test.t2.a)", - "Hints": "hash_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" + "Hints": "hash_join_build(`test`.`t2`), use_index(@`sel_1` `test`.`t1` ), use_index(@`sel_1` `test`.`t2` )" }, { "SQL": "select /*+ MERGE_JOIN(t1) */ t1.b, t2.b from t1 inner join t2 on t1.a = t2.a;", "Plan": "MergeInnerJoin{IndexLookUp(Index(t1.idx_a)[[-inf,+inf]], Table(t1))->Projection->IndexLookUp(Index(t2.idx_a)[[-inf,+inf]], Table(t2))->Projection}(test.t1.a,test.t2.a)", - "Hints": "merge_join(@`sel_1` `test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` `idx_a`), order_index(@`sel_1` `test`.`t2` `idx_a`)" + "Hints": "merge_join(`test`.`t1`), use_index(@`sel_1` `test`.`t1` `idx_a`), order_index(@`sel_1` `test`.`t1` `idx_a`), use_index(@`sel_1` `test`.`t2` `idx_a`), order_index(@`sel_1` `test`.`t2` `idx_a`)" } ] }, diff --git a/pkg/planner/core/hint_utils.go b/pkg/planner/core/hint_utils.go index 2c81a8d9f6591..c4fa9bcd8ba40 100644 --- a/pkg/planner/core/hint_utils.go +++ b/pkg/planner/core/hint_utils.go @@ -15,6 +15,8 @@ package core import ( + "slices" + "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" @@ -39,9 +41,18 @@ func GenHintsFromFlatPlan(flat *FlatPhysicalPlan) []*ast.TableOptimizerHint { if len(selectPlan) == 0 || !selectPlan[0].IsPhysicalPlan { return nil } + // To generate leading hint, we need to extract join group from the plan tree by traversing children of PhysicalJoin + // operators. We use this map to avoid revisiting the same operator during this process. + visitedPhysicalJoinIDs := make(map[int]struct{}) for _, fop := range selectPlan { p := fop.Origin.(base.PhysicalPlan) hints = genHintsFromSingle(p, nodeTp, fop.StoreType, hints) + if join, ok := p.(PhysicalJoin); ok { + joinOrderHint := genJoinOrderHintFromRootPhysicalJoin(join, visitedPhysicalJoinIDs, nodeTp) + if joinOrderHint != nil { + hints = append(hints, joinOrderHint) + } + } } for _, cte := range flat.CTEs { for i, fop := range cte { @@ -50,6 +61,12 @@ func GenHintsFromFlatPlan(flat *FlatPhysicalPlan) []*ast.TableOptimizerHint { } p := fop.Origin.(base.PhysicalPlan) hints = genHintsFromSingle(p, nodeTp, fop.StoreType, hints) + if join, ok := p.(PhysicalJoin); ok { + joinOrderHint := genJoinOrderHintFromRootPhysicalJoin(join, visitedPhysicalJoinIDs, nodeTp) + if joinOrderHint != nil { + hints = append(hints, joinOrderHint) + } + } } } return h.RemoveDuplicatedHints(hints) @@ -184,28 +201,103 @@ func genHintsFromSingle(p base.PhysicalPlan, nodeType h.NodeType, storeType kv.S }) } case *PhysicalMergeJoin: - hint := genJoinMethodHintForSinglePhysicalJoin(p.SCtx(), h.HintSMJ, p.QueryBlockOffset(), nodeType, pp.Children()...) + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintSMJ, + p.QueryBlockOffset(), + nodeType, + false, + pp.Children()..., + ) if hint != nil { res = append(res, hint) } case *PhysicalHashJoin: - // TODO: support the hash_join_build and hash_join_probe hint for auto capture - hint := genJoinMethodHintForSinglePhysicalJoin(p.SCtx(), h.HintHJ, p.QueryBlockOffset(), nodeType, pp.Children()...) + // For semi join, hash_join_[build|probe] is not supported. See getHashJoins() for details. + if pp.JoinType.IsSemiJoin() { + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintHJ, + p.QueryBlockOffset(), + nodeType, + false, + pp.Children()..., + ) + if hint != nil { + res = append(res, hint) + } + break + } + var buildSideChild, probeSideChild base.PhysicalPlan + if pp.RightIsBuildSide() { + buildSideChild = pp.Children()[1] + probeSideChild = pp.Children()[0] + } else { + buildSideChild = pp.Children()[0] + probeSideChild = pp.Children()[1] + } + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintHashJoinBuild, + p.QueryBlockOffset(), + nodeType, + true, + buildSideChild, + probeSideChild, + ) if hint != nil { res = append(res, hint) + } else { + // In case we failed to generate the hint for build side, we try to generate the hint for probe side. + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintHashJoinProbe, + p.QueryBlockOffset(), + nodeType, + true, + probeSideChild, + buildSideChild, + ) + if hint != nil { + res = append(res, hint) + } } case *PhysicalIndexJoin: - hint := genJoinMethodHintForSinglePhysicalJoin(p.SCtx(), h.HintINLJ, p.QueryBlockOffset(), nodeType, pp.Children()[pp.InnerChildIdx]) + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintINLJ, + p.QueryBlockOffset(), + nodeType, + true, + pp.Children()[pp.InnerChildIdx], + pp.Children()[1-pp.InnerChildIdx], + ) if hint != nil { res = append(res, hint) } case *PhysicalIndexMergeJoin: - hint := genJoinMethodHintForSinglePhysicalJoin(p.SCtx(), h.HintINLMJ, p.QueryBlockOffset(), nodeType, pp.Children()[pp.InnerChildIdx]) + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintINLMJ, + p.QueryBlockOffset(), + nodeType, + true, + pp.Children()[pp.InnerChildIdx], + pp.Children()[1-pp.InnerChildIdx], + ) if hint != nil { res = append(res, hint) } case *PhysicalIndexHashJoin: - hint := genJoinMethodHintForSinglePhysicalJoin(p.SCtx(), h.HintINLHJ, p.QueryBlockOffset(), nodeType, pp.Children()[pp.InnerChildIdx]) + hint := genJoinMethodHintForSinglePhysicalJoin( + p.SCtx(), + h.HintINLHJ, + p.QueryBlockOffset(), + nodeType, + true, + pp.Children()[pp.InnerChildIdx], + pp.Children()[1-pp.InnerChildIdx], + ) if hint != nil { res = append(res, hint) } @@ -220,59 +312,186 @@ func getTableName(tblName model.CIStr, asName *model.CIStr) model.CIStr { return tblName } -func genJoinMethodHintForSinglePhysicalJoin(sctx base.PlanContext, joinType string, parentOffset int, nodeType h.NodeType, children ...base.PhysicalPlan) (res *ast.TableOptimizerHint) { - if parentOffset == -1 { - return res +// genJoinMethodHintForSinglePhysicalJoin is the entry point of generating join method hint. +// It generates a join method hint for a single physical join operator according to the input joinType. +// Both children of the Join should be passed in as the children arguments, this is for correctly deriving the QB offset +// for the hint. +// For hints like merge_join(), we can generate hint using table name of any one of the two tables. But for hints like +// hash_join_build() and inl_join(), we want to generate hint using table name of a specific side. For this difference, +// we introduce the onlyFirstTbl argument. If onlyFirstTbl is true, we only try to generate hint using the table name of +// the children[0]. +func genJoinMethodHintForSinglePhysicalJoin( + sctx base.PlanContext, + joinType string, + parentQBOffset int, + nodeType h.NodeType, + onlyFirstTbl bool, + children ...base.PhysicalPlan, +) *ast.TableOptimizerHint { + if parentQBOffset == -1 { + return nil + } + hintTbls, hintQBName := genHintTblForJoinNodes(sctx, children, parentQBOffset, nodeType) + effectiveHintTbls := slices.DeleteFunc(slices.Clone(hintTbls), func(ht *ast.HintTable) bool { + return ht == nil + }) + if len(effectiveHintTbls) == 0 { + return nil + } + + if onlyFirstTbl && hintTbls[0] == nil { + return nil + } + + newHint := &ast.TableOptimizerHint{ + HintName: model.NewCIStr(joinType), + Tables: []ast.HintTable{*effectiveHintTbls[0]}, + } + + if hintQBName != nil { + newHint.QBName = *hintQBName } - for _, child := range children { - qbOffset, ht := extractHintTableForJoinNode(sctx, child, parentOffset) + + return newHint +} + +// genHintTblForJoinNodes tries to generate ast.HintTable for each join node, and the QB name for the hint itself. +// (Join node here means the operators that are joined, not Join operator itself) +// If the return values is not (nil,nil), len(hintTbls) should be equal to len(joinedNodes). The invalid ones in the +// returned hintTbls slice will be nil. +// The hintQBNamePtr will be nil if it's not needed, or we failed to generate one. +func genHintTblForJoinNodes( + sctx base.PlanContext, + joinedNodes []base.PhysicalPlan, + parentQBOffset int, + nodeType h.NodeType, +) (hintTbls []*ast.HintTable, hintQBNamePtr *model.CIStr) { + // 1. Use genHintTblForSingleJoinNode() to generate QB offset and table name for each join node. + + // Note that if we failed to generate valid information for one element in joinedNodes, we append -1 and nil instead + // of skipping. + // So qbOffsets[x] is -1 if and only if hintTbls[x] is nil; + // and qbOffsets[x] >=0 if and only if hintTbls[x] is not nil. + hintTbls = make([]*ast.HintTable, 0, len(joinedNodes)) + qbOffsets := make([]int, 0, len(joinedNodes)) + guessQBOffsets := make(map[int]struct{}) + for _, plan := range joinedNodes { + qbOffset, guessOffset, ht := genHintTblForSingleJoinNode(sctx, plan, parentQBOffset) if qbOffset < 0 || ht == nil { + qbOffsets = append(qbOffsets, -1) + hintTbls = append(hintTbls, nil) + continue + } + // If we guessed the same QB offset for two different nodes, that's likely incorrect, and we stop use that. + // This may happen for queries like ... FROM t1 join (select * from t2 join t3) derived ... . We will guess + // derived@sel_1 for both t2 and t3, and that's incorrect. Besides, current leading hint also can't handle this + // kind of hints. + if guessOffset { + if _, ok := guessQBOffsets[qbOffset]; ok { + qbOffsets = append(qbOffsets, -1) + hintTbls = append(hintTbls, nil) + continue + } + guessQBOffsets[qbOffset] = struct{}{} + } + qbOffsets = append(qbOffsets, qbOffset) + hintTbls = append(hintTbls, ht) + } + + // 2. Add QB name for each table name in the hint. + + for i, hintTbl := range hintTbls { + if hintTbl == nil { continue } - qbName, err := h.GenerateQBName(nodeType, qbOffset) + // In quick binding, we always put the generated hints in the first valid place in the SQL. + // That implies hintname(@del_1) and hintname(@upd_1) is unnecessary in UPDATE/DELETE statements, and + // hintname(@sel_1) is unnecessary in SELECT statements. + // We don't generate QB name for the table names in the hint in this case to make the result cleaner. + if (qbOffsets[i] <= 1 && nodeType == h.TypeSelect) || + (qbOffsets[i] == 0 && (nodeType == h.TypeUpdate || nodeType == h.TypeDelete)) { + continue + } + tblQBName, err := h.GenerateQBName(nodeType, qbOffsets[i]) if err != nil { continue } - return &ast.TableOptimizerHint{ - QBName: qbName, - HintName: model.NewCIStr(joinType), - Tables: []ast.HintTable{*ht}, + hintTbls[i].QBName = tblQBName + } + + // 3. Generate QB name for the hint itself based on the QB name of each join node from step 1. + + // Current join reorder will break QB offset of the join operator, e.g. setting them to -1. + // So we are unable to get the correct QB offset for the hint from the join operator, now we use the minimum QB + // offset among the tables. + // Besides, genHintTblForSingleJoinNode() is not powerful enough to handle all cases, it may fail in some cases. + // If we failed to get QB offset information from one join node, we don't generate QB name for the hint. Because + // that may cause a wrong QB offset, leaving it blank is probably better. + if slices.Contains(qbOffsets, -1) { + return hintTbls, nil + } + minQBOffset := slices.Min(qbOffsets) + + // ditto. We don't generate unnecessary QB name for the hint itself. + if (minQBOffset > 1 && nodeType == h.TypeSelect) || + (minQBOffset > 0 && (nodeType == h.TypeUpdate || nodeType == h.TypeDelete)) { + hintQBName, err := h.GenerateQBName(nodeType, minQBOffset) + if err != nil { + return nil, nil } + hintQBNamePtr = &hintQBName } - return res + return hintTbls, hintQBNamePtr } -func extractHintTableForJoinNode( +// genHintTblForSingleJoinNode tries to generate ast.HintTable and QB offset for a single join node. +// See the comments inside about the meaning of guessQBOffset. +func genHintTblForSingleJoinNode( sctx base.PlanContext, joinNode base.PhysicalPlan, parentOffset int, ) ( qbOffset int, + guessQBOffset bool, ht *ast.HintTable, ) { - qbOffset = joinNode.QueryBlockOffset() + selfOffset := joinNode.QueryBlockOffset() + qbOffset = selfOffset if qbOffset == -1 { - return -1, nil + return -1, false, nil } + guessQBOffset = false var dbName, tableName *model.CIStr + // For sub-queries like `(select * from t) t1`, t1 should belong to its surrounding select block. if qbOffset != parentOffset { var blockAsNames []ast.HintTable if p := sctx.GetSessionVars().PlannerSelectBlockAsName.Load(); p != nil { blockAsNames = *p } if qbOffset >= len(blockAsNames) { - return -1, nil + return -1, false, nil } hintTable := blockAsNames[qbOffset] - // For sub-queries like `(select * from t) t1`, t1 should belong to its surrounding select block. dbName, tableName, qbOffset = &hintTable.DBName, &hintTable.TableName, parentOffset - } else { + // Current join reorder will break QB offset of the join operator by setting them to -1. In this case, we will + // get qbOffset == parentOffset == -1 when it comes here. + // For this case, we add a temporary fix to guess the QB offset based on the parent offset. The idea is simple, + // for the example above, we can easily notice that the QBOffset(t1) = QBOffset(t) - 1. This is not always true, + // but it works in simple cases. + if selfOffset > 1 && qbOffset == -1 { + guessQBOffset = true + qbOffset = selfOffset - 1 + } + } + if tableName == nil || tableName.L == "" { + guessQBOffset = false + qbOffset = joinNode.QueryBlockOffset() dbName, tableName = extractTableAsName(joinNode) } if tableName == nil || tableName.L == "" { - return -1, nil + return -1, false, nil } - return qbOffset, &ast.HintTable{DBName: *dbName, TableName: *tableName} + return qbOffset, guessQBOffset, &ast.HintTable{DBName: *dbName, TableName: *tableName} } func extractTableAsName(p base.PhysicalPlan) (*model.CIStr, *model.CIStr) { @@ -282,24 +501,121 @@ func extractTableAsName(p base.PhysicalPlan) (*model.CIStr, *model.CIStr) { switch x := p.(type) { case *PhysicalTableReader: ts := x.TablePlans[0].(*PhysicalTableScan) - if ts.TableAsName.L != "" { + if ts.TableAsName != nil && ts.TableAsName.L != "" { return &ts.DBName, ts.TableAsName } return &ts.DBName, &ts.Table.Name case *PhysicalIndexReader: is := x.IndexPlans[0].(*PhysicalIndexScan) - if is.TableAsName.L != "" { + if is.TableAsName != nil && is.TableAsName.L != "" { return &is.DBName, is.TableAsName } return &is.DBName, &is.Table.Name case *PhysicalIndexLookUpReader: is := x.IndexPlans[0].(*PhysicalIndexScan) - if is.TableAsName.L != "" { + if is.TableAsName != nil && is.TableAsName.L != "" { return &is.DBName, is.TableAsName } return &is.DBName, &is.Table.Name - case *PhysicalSort, *PhysicalSelection, *PhysicalUnionScan, *PhysicalProjection: + case *PhysicalSort, *PhysicalSelection, *PhysicalUnionScan, *PhysicalProjection, + *PhysicalHashAgg, *PhysicalStreamAgg: return extractTableAsName(p.Children()[0]) } return nil, nil } + +// genJoinOrderHintFromRootPhysicalJoin is the entry point of generating join order hint. +func genJoinOrderHintFromRootPhysicalJoin( + p PhysicalJoin, + visitedIDs map[int]struct{}, + nodeType h.NodeType, +) *ast.TableOptimizerHint { + if _, visited := visitedIDs[p.ID()]; visited { + return nil + } + + // 1. Get the joined operators in this join group with correct order in the slice. + orderedJoinGroup := extractOrderedPhysicalJoinGroup(p, visitedIDs, 1) + // If it only involves two tables, we don't need to generate the join order hint. + if len(orderedJoinGroup) <= 2 { + return nil + } + + // 2. Generate the leading hint based on the ordered join nodes. + hintTbls, hintQBName := genHintTblForJoinNodes(p.SCtx(), orderedJoinGroup, p.QueryBlockOffset(), nodeType) + + // For now, we generate the leading hint only if we successfully generate the names for all nodes. + if slices.Contains(hintTbls, nil) { + return nil + } + + hintTblVals := make([]ast.HintTable, 0, len(hintTbls)) + for _, ht := range hintTbls { + hintTblVals = append(hintTblVals, *ht) + } + res := &ast.TableOptimizerHint{ + HintName: model.NewCIStr(h.HintLeading), + Tables: hintTblVals, + } + if hintQBName != nil { + res.QBName = *hintQBName + } + return res +} + +func extractOrderedPhysicalJoinGroup(p PhysicalJoin, visitedIDs map[int]struct{}, depth uint) []base.PhysicalPlan { + visitedIDs[p.ID()] = struct{}{} + + // 1. sanity checks + + // In our join reorder implementation, cartesian join will break the join relationship and make its two children + // two independent join groups. So we don't need to handle it here. + // Currently, index joins must match the index or PK of the inner table, so cartesian join must be a hash join. + if hashJoin, ok := p.(*PhysicalHashJoin); ok { + if len(hashJoin.EqualConditions) == 0 && len(hashJoin.NAEqualConditions) == 0 { + return nil + } + } + + jt := p.GetJoinType() + // They are the only join types supported by current join reorder. + if jt != InnerJoin && jt != LeftOuterJoin && jt != RightOuterJoin { + return nil + } + + // 2. Extract information from children according to whether the child is another Join, then construct the ordered + // join group and return. + + var child0IsJoin, child1IsJoin bool + var childJoin PhysicalJoin + var childJoinGroup []base.PhysicalPlan + if childJoin, child0IsJoin = p.Children()[0].(PhysicalJoin); child0IsJoin { + childJoinGroup = extractOrderedPhysicalJoinGroup(childJoin, visitedIDs, depth+1) + } + if childJoin, child1IsJoin = p.Children()[1].(PhysicalJoin); child1IsJoin { + childJoinGroup = extractOrderedPhysicalJoinGroup(childJoin, visitedIDs, depth+1) + } + + // case 1 - bushy join: not supported now, also should not appear now + if child0IsJoin && child1IsJoin { + return nil + } + // case 2 - leaf join operator: initialize the join group with the two children + if !child0IsJoin && !child1IsJoin { + // preallocate the slice based on the number of join operators to avoid reallocations + orderedJoinGroup := make([]base.PhysicalPlan, 0, depth+1) + orderedJoinGroup = append(orderedJoinGroup, p.Children()[0], p.Children()[1]) + return orderedJoinGroup + } + // case 3 - non-leaf join operator: append the non-join child to the join group from the Join child + if len(childJoinGroup) < 2 { + return nil + } + var orderedJoinGroup []base.PhysicalPlan + if child0IsJoin { + orderedJoinGroup = append(childJoinGroup, p.Children()[1]) + } else { + orderedJoinGroup = append(childJoinGroup, p.Children()[0]) + } + return orderedJoinGroup +} diff --git a/tests/integrationtest/r/planner/core/physical_plan.result b/tests/integrationtest/r/planner/core/physical_plan.result index d5a60e8f4608c..362fc85f0dd8c 100644 --- a/tests/integrationtest/r/planner/core/physical_plan.result +++ b/tests/integrationtest/r/planner/core/physical_plan.result @@ -2,13 +2,13 @@ drop table if exists t; create table t(a int, b int, c int, key(b), key(c)); explain format='hint' select /*+ inl_merge_join(t2) */ * from t t1 inner join t t2 on t1.b = t2.b and t1.c = 1; hint -inl_hash_join(@`sel_1` `planner__core__physical_plan`.`t2`), use_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), use_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), inl_merge_join(`t2`) +inl_hash_join(`planner__core__physical_plan`.`t2`), use_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), use_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), inl_merge_join(`t2`) show warnings; Level Code Message Warning 1815 The INDEX MERGE JOIN hint is deprecated for usage, try other hints. explain format='hint' select /*+ inl_hash_join(t2) */ * from t t1 inner join t t2 on t1.b = t2.b and t1.c = 1; hint -inl_hash_join(@`sel_1` `planner__core__physical_plan`.`t2`), use_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), use_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), inl_hash_join(`t2`) +inl_hash_join(`planner__core__physical_plan`.`t2`), use_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t1` `c`), use_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), no_order_index(@`sel_1` `planner__core__physical_plan`.`t2` `b`), inl_hash_join(`t2`) drop table if exists t; create table t(a int, b int, c int, index idx_a(a)); select extract(day_hour from 'ziy') as res from t order by res limit 1;