From 3e2185f57c6da9ba76baf73592c312184a420c08 Mon Sep 17 00:00:00 2001 From: Wish Date: Wed, 8 Jan 2025 22:47:33 +0900 Subject: [PATCH] planner: Fix vector not truncated after CBO Signed-off-by: Wish --- pkg/expression/integration_test/BUILD.bazel | 1 + .../integration_test/integration_test.go | 72 +++++++++++++++++++ pkg/planner/core/explain.go | 2 +- 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/pkg/expression/integration_test/BUILD.bazel b/pkg/expression/integration_test/BUILD.bazel index 139264b797cca..e101be7024b28 100644 --- a/pkg/expression/integration_test/BUILD.bazel +++ b/pkg/expression/integration_test/BUILD.bazel @@ -12,6 +12,7 @@ go_test( deps = [ "//pkg/config", "//pkg/domain", + "//pkg/domain/infosync", "//pkg/errno", "//pkg/expression", "//pkg/kv", diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 941f6726e5054..42e9ddfef169f 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -33,6 +33,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/errno" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/kv" @@ -322,6 +323,24 @@ func TestVectorColumnInfo(t *testing.T) { tk.MustGetErrMsg("create table t(embedding VECTOR(16384))", "vector cannot have more than 16383 dimensions") } +func TestVectorExplainTruncate(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE t(c VECTOR);") + + // TODO: The output can be improved. + tk.MustQuery(`EXPLAIN format='brief' SELECT + VEC_COSINE_DISTANCE(c, '[3,100,12345,10000]'), + VEC_COSINE_DISTANCE(c, '[11111111111,11111111111.23456789,3.1,5.12456]'), + VEC_COSINE_DISTANCE(c, '[-11111111111,-11111111111.23456789,-3.1,-5.12456]') + FROM t;`).Check(testkit.Rows( + `Projection 10000.00 root vec_cosine_distance(test.t.c, [3,1e+02,1.2e+04,1e+04])->Column#3, vec_cosine_distance(test.t.c, [1.1e+10,1.1e+10,3.1,5.1])->Column#4, vec_cosine_distance(test.t.c, [-1.1e+10,-1.1e+10,-3.1,-5.1])->Column#5`, + `└─TableReader 10000.00 root data:TableFullScan`, + ` └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo`, + )) +} + func TestVectorConstantExplain(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) @@ -379,6 +398,59 @@ func TestVectorConstantExplain(t *testing.T) { tk.ResultSetToResult(rs, fmt.Sprintf("%v", rs)) } +func TestVectorIndexExplain(t *testing.T) { + store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) + + tk := testkit.NewTestKit(t, store) + + tiflash := infosync.NewMockTiFlash() + infosync.SetMockTiFlash(tiflash) + defer func() { + tiflash.Lock() + tiflash.StatusServer.Close() + tiflash.Unlock() + }() + + failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/MockCheckVectorIndexProcess", `return(1)`) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/MockCheckVectorIndexProcess")) + }() + + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec(` + create table t1 ( + vec vector(100) + ) + `) + tk.MustExec("alter table t1 set tiflash replica 1;") + tk.MustExec("alter table t1 add vector index ((vec_cosine_distance(vec))) USING HNSW;") + tbl, _ := domain.GetDomain(tk.Session()).InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t1")) + tbl.Meta().TiFlashReplica = &model.TiFlashReplicaInfo{ + Count: 1, + Available: true, + } + + vb := strings.Builder{} + vb.WriteString("[") + for i := 0; i < 100; i++ { + if i > 0 { + vb.WriteString(",") + } + vb.WriteString("100") + } + vb.WriteString("]") + + tk.MustQuery(fmt.Sprintf("explain format = 'brief' select * from t1 order by vec_cosine_distance(vec, '%s') limit 1", vb.String())).Check(testkit.Rows( + `TopN 1.00 root Column#5, offset:0, count:1`, + `└─TableReader 1.00 root MppVersion: 2, data:ExchangeSender`, + ` └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough`, + ` └─TopN 1.00 mpp[tiflash] Column#5, offset:0, count:1`, + ` └─Projection 1.00 mpp[tiflash] test.t1.vec, vec_cosine_distance(test.t1.vec, [1e+02,1e+02,1e+02,1e+02,1e+02,(95 more)...])->Column#5`, + ` └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1e+02,1e+02,1e+02,1e+02,1e+02,(95 more)...], limit:1)`, + )) +} + func TestFixedVector(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/pkg/planner/core/explain.go b/pkg/planner/core/explain.go index 191d09f61caee..0ebbfe2fcf73e 100644 --- a/pkg/planner/core/explain.go +++ b/pkg/planner/core/explain.go @@ -284,7 +284,7 @@ func (p *PhysicalTableScan) OperatorInfo(normalized bool) string { if err != nil { buffer.WriteString("[?]") } else { - buffer.WriteString(v.String()) + buffer.WriteString(v.TruncatedString()) } } buffer.WriteString(", limit:")