diff --git a/Cargo.toml b/Cargo.toml index 803e51c..c900511 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,14 @@ [workspace] members = [ - "xngin-datatype", - "xngin-catalog", - "xngin-sql", - "xngin-expr", - "xngin-compute", - "xngin-plan", - "xngin-protocol", - "xngin-storage", - "xngin-server", - "xngin-tpch-tests" + "doradb-datatype", + "doradb-catalog", + "doradb-sql", + "doradb-expr", + "doradb-compute", + "doradb-plan", + "doradb-protocol", + "doradb-storage", + "doradb-server", + "doradb-tpch-tests" ] -exclude = ["xngin-bench"] +exclude = ["doradb-bench"] diff --git a/README.md b/README.md index 4e30fc5..4fdfa11 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,13 @@ -# XNGIN +# EVODB -![build](https://github.com/jiangzhe/xngin/actions/workflows/build.yml/badge.svg) -![codecov](https://codecov.io/gh/jiangzhe/xngin/branch/main/graph/badge.svg?token=T3RMZE2998) +![build](https://github.com/jiangzhe/evodb/actions/workflows/build.yml/badge.svg) +![codecov](https://codecov.io/gh/jiangzhe/evodb/branch/main/graph/badge.svg?token=T3RMZE2998) -Xngin(pronounced "X Engine") is a personal project to build a SQL engine from scratch. - -The project name is inspired by [Nginx](https://en.wikipedia.org/wiki/Nginx), which is a -very popular web server with high performance and ease to use. +doradb is a experimental project to memory-optimized disk-based database from scratch. ## Goal -1. Fast. -2. Easy to use. -3. Distributed. +Fast speed on both transactional and analytical processing. ## License diff --git a/xngin-bench/.gitignore b/doradb-bench/.gitignore similarity index 100% rename from xngin-bench/.gitignore rename to doradb-bench/.gitignore diff --git a/xngin-bench/Cargo.toml b/doradb-bench/Cargo.toml similarity index 73% rename from xngin-bench/Cargo.toml rename to doradb-bench/Cargo.toml index 2dd882c..2b4da62 100644 --- a/xngin-bench/Cargo.toml +++ b/doradb-bench/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-bench" +name = "doradb-bench" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,12 +7,12 @@ license = "MIT OR Apache-2.0" description = "Benchmarks of X-Engine" keywords = ["database", "benchmark"] categories = ["database"] -repository = "https://github.com/jiangzhe/xngin/xngin-bench/" +repository = "https://github.com/jiangzhe/doradb/doradb-bench/" [dependencies] -xngin-datatype = {version = "0.1.0", path = "../xngin-datatype"} -xngin-compute = {version = "0.1.0", path = "../xngin-compute"} -xngin-storage = {version = "0.1.0", path = "../xngin-storage"} +doradb-datatype = {version = "0.1.0", path = "../doradb-datatype"} +doradb-compute = {version = "0.1.0", path = "../doradb-compute"} +doradb-storage = {version = "0.1.0", path = "../doradb-storage"} criterion = {version = "0.3", features = ["html_reports"]} rand = "0.8.3" pprof = {version = "0.4", features = ["flamegraph", "criterion"]} diff --git a/xngin-bench/benches/bitmap_count.rs b/doradb-bench/benches/bitmap_count.rs similarity index 96% rename from xngin-bench/benches/bitmap_count.rs rename to doradb-bench/benches/bitmap_count.rs index d60a70a..0035672 100644 --- a/xngin-bench/benches/bitmap_count.rs +++ b/doradb-bench/benches/bitmap_count.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::{thread_rng, Rng}; -use xngin_storage::bitmap::Bitmap; +use doradb_storage::bitmap::Bitmap; fn bench_count(c: &mut Criterion) { const N1: usize = 10240; diff --git a/xngin-bench/benches/bitmap_extend.rs b/doradb-bench/benches/bitmap_extend.rs similarity index 98% rename from xngin-bench/benches/bitmap_extend.rs rename to doradb-bench/benches/bitmap_extend.rs index 4cf873c..6b06e5f 100644 --- a/xngin-bench/benches/bitmap_extend.rs +++ b/doradb-bench/benches/bitmap_extend.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::iter::FromIterator; -use xngin_storage::bitmap::Bitmap; +use doradb_storage::bitmap::Bitmap; fn bench_extend(c: &mut Criterion) { for log2_size in [12, 14] { diff --git a/xngin-bench/benches/bitmap_intersect.rs b/doradb-bench/benches/bitmap_intersect.rs similarity index 94% rename from xngin-bench/benches/bitmap_intersect.rs rename to doradb-bench/benches/bitmap_intersect.rs index 9d84dfd..f763026 100644 --- a/xngin-bench/benches/bitmap_intersect.rs +++ b/doradb-bench/benches/bitmap_intersect.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::iter::FromIterator; -use xngin_storage::bitmap::Bitmap; +use doradb_storage::bitmap::Bitmap; fn bench_intersect(c: &mut Criterion) { for log2_size in [12, 14] { diff --git a/xngin-bench/benches/bitmap_range_iter.rs b/doradb-bench/benches/bitmap_range_iter.rs similarity index 97% rename from xngin-bench/benches/bitmap_range_iter.rs rename to doradb-bench/benches/bitmap_range_iter.rs index cbbbb56..3e052c3 100644 --- a/xngin-bench/benches/bitmap_range_iter.rs +++ b/doradb-bench/benches/bitmap_range_iter.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::{thread_rng, Rng}; -use xngin_storage::bitmap::Bitmap; +use doradb_storage::bitmap::Bitmap; fn bench_range_iter(c: &mut Criterion) { const N: usize = 10240; diff --git a/xngin-bench/benches/bitmap_shift.rs b/doradb-bench/benches/bitmap_shift.rs similarity index 96% rename from xngin-bench/benches/bitmap_shift.rs rename to doradb-bench/benches/bitmap_shift.rs index e1e024d..9b24dbd 100644 --- a/xngin-bench/benches/bitmap_shift.rs +++ b/doradb-bench/benches/bitmap_shift.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::iter::FromIterator; -use xngin_storage::bitmap::Bitmap; +use doradb_storage::bitmap::Bitmap; fn bench_shift(c: &mut Criterion) { for log2_size in [12, 14] { diff --git a/xngin-bench/benches/codec_add.rs b/doradb-bench/benches/codec_add.rs similarity index 87% rename from xngin-bench/benches/codec_add.rs rename to doradb-bench/benches/codec_add.rs index a32a06e..8a67664 100644 --- a/xngin-bench/benches/codec_add.rs +++ b/doradb-bench/benches/codec_add.rs @@ -1,8 +1,8 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use xngin_compute::BinaryEval; -use xngin_compute::arith::{Impl, AddI32, AddI64}; -use xngin_storage::attr::Attr; -use xngin_datatype::PreciseType; +use doradb_compute::BinaryEval; +use doradb_compute::arith::{Impl, AddI32, AddI64}; +use doradb_storage::attr::Attr; +use doradb_datatype::PreciseType; fn bench_codec(c: &mut Criterion) { for log2_size in [12, 14] { diff --git a/xngin-bench/benches/memcmp_sort.rs b/doradb-bench/benches/memcmp_sort.rs similarity index 96% rename from xngin-bench/benches/memcmp_sort.rs rename to doradb-bench/benches/memcmp_sort.rs index 68797e3..3fc6ea5 100644 --- a/xngin-bench/benches/memcmp_sort.rs +++ b/doradb-bench/benches/memcmp_sort.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::Rng; -use xngin_datatype::memcmp::MemCmpFormat; +use doradb_datatype::memcmp::MemCmpFormat; fn bench_sort(c: &mut Criterion) { let mut group = c.benchmark_group("memsort"); diff --git a/xngin-bench/examples/codec_add.rs b/doradb-bench/examples/codec_add.rs similarity index 83% rename from xngin-bench/examples/codec_add.rs rename to doradb-bench/examples/codec_add.rs index 189344e..d2eea5d 100644 --- a/xngin-bench/examples/codec_add.rs +++ b/doradb-bench/examples/codec_add.rs @@ -1,9 +1,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use pprof::criterion::{PProfProfiler, Output}; -use xngin_compute::BinaryEval; -use xngin_compute::arith::{Impl, AddI32}; -use xngin_storage::attr::Attr; -use xngin_datatype::PreciseType; +use doradb_compute::BinaryEval; +use doradb_compute::arith::{Impl, AddI32}; +use doradb_storage::attr::Attr; +use doradb_datatype::PreciseType; fn bench_add(c: &mut Criterion) { let size = 4096; diff --git a/xngin-bench/src/lib.rs b/doradb-bench/src/lib.rs similarity index 100% rename from xngin-bench/src/lib.rs rename to doradb-bench/src/lib.rs diff --git a/xngin-catalog/Cargo.toml b/doradb-catalog/Cargo.toml similarity index 66% rename from xngin-catalog/Cargo.toml rename to doradb-catalog/Cargo.toml index 13c025d..2450e18 100644 --- a/xngin-catalog/Cargo.toml +++ b/doradb-catalog/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-catalog" +name = "doradb-catalog" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,10 +7,10 @@ license = "MIT OR Apache-2.0" description = "Catalog of X-Engine" keywords = ["database"] categories = ["database"] -repository = "https://github.com/jiangzhe/xngin/xngin-catalog/" +repository = "https://github.com/jiangzhe/doradb/doradb-catalog/" [dependencies] -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } semistr = "0.1" bitflags = "1.3" indexmap = "1.7" diff --git a/xngin-catalog/src/error.rs b/doradb-catalog/src/error.rs similarity index 100% rename from xngin-catalog/src/error.rs rename to doradb-catalog/src/error.rs diff --git a/xngin-catalog/src/lib.rs b/doradb-catalog/src/lib.rs similarity index 78% rename from xngin-catalog/src/lib.rs rename to doradb-catalog/src/lib.rs index d6ea4f1..c20a228 100644 --- a/xngin-catalog/src/lib.rs +++ b/doradb-catalog/src/lib.rs @@ -4,9 +4,10 @@ pub mod mem_impl; use crate::error::Result; use bitflags::bitflags; use semistr::SemiStr; +use std::fmt; use std::hash::Hash; use std::marker::PhantomData; -use xngin_datatype::PreciseType; +use doradb_datatype::PreciseType; /// Catalog maintains metadata of all database objects. /// It could be shared between threads. @@ -40,14 +41,22 @@ pub trait Catalog: Send + Sync { fn exists_column(&self, table_id: &TableID, column_name: &str) -> bool; fn find_column_by_name(&self, table_id: &TableID, column_name: &str) -> Option; + + fn find_keys(&self, table_id: &TableID) -> Vec; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ObjectID { id: u32, _marker: PhantomData, } +impl fmt::Debug for ObjectID { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ObjectID").field("id", &self.id).finish() + } +} + impl ObjectID { /// Required to create object only within the catalog module. pub(crate) fn new(id: u32) -> Self { @@ -90,6 +99,11 @@ pub struct Table { pub name: SemiStr, } +pub enum Key { + PrimaryKey(Vec), + UniqueKey(Vec), +} + /// Table spec used in creating table #[derive(Debug, Clone, PartialEq, Eq)] pub struct TableSpec { @@ -115,7 +129,7 @@ pub struct Column { pub table_id: TableID, pub name: SemiStr, pub pty: PreciseType, - pub idx: u32, + pub idx: ColIndex, pub attr: ColumnAttr, } @@ -138,6 +152,33 @@ impl ColumnSpec { } } +/// ColIndex wraps u32 to be the index of column in current table/subquery. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ColIndex(u32); + +impl ColIndex { + #[inline] + pub fn value(&self) -> u32 { + self.0 + } +} + +impl From for ColIndex { + fn from(src: u32) -> Self { + ColIndex(src) + } +} + +impl std::fmt::Display for ColIndex { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "c{}", self.0) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TblCol(pub TableID, pub ColIndex); + bitflags! { pub struct ColumnAttr: u8 { const PK = 0x01; // primary key diff --git a/xngin-catalog/src/mem_impl.rs b/doradb-catalog/src/mem_impl.rs similarity index 84% rename from xngin-catalog/src/mem_impl.rs rename to doradb-catalog/src/mem_impl.rs index 06850a9..fc02ac3 100644 --- a/xngin-catalog/src/mem_impl.rs +++ b/doradb-catalog/src/mem_impl.rs @@ -1,5 +1,8 @@ use crate::error::{Error, Result}; -use crate::{Catalog, Column, ColumnID, Schema, SchemaID, Table, TableID, TableSpec}; +use crate::{ + Catalog, ColIndex, Column, ColumnAttr, ColumnID, Key, Schema, SchemaID, Table, TableID, + TableSpec, +}; use indexmap::IndexMap; use parking_lot::RwLock; use semistr::SemiStr; @@ -188,7 +191,7 @@ impl Catalog for MemCatalog { name: c.name.clone(), pty: c.pty, attr: c.attr, - idx: i as u32, + idx: ColIndex::from(i as u32), }; columns.push(column); } @@ -222,4 +225,40 @@ impl Catalog for MemCatalog { } } } + + #[inline] + fn find_keys(&self, table_id: &TableID) -> Vec { + let inner = self.inner.read(); + if let Some(columns) = inner.table_columns.get(table_id).map(|twc| &twc.columns) { + let mut res = vec![]; + let pk: Vec = columns + .iter() + .filter_map(|c| { + if c.attr.contains(ColumnAttr::PK) { + Some(c.clone()) + } else { + None + } + }) + .collect(); + if !pk.is_empty() { + res.push(Key::PrimaryKey(pk)); + } + let uk: Vec = columns + .iter() + .filter_map(|c| { + if c.attr.contains(ColumnAttr::UK) { + Some(c.clone()) + } else { + None + } + }) + .collect(); + if !uk.is_empty() { + res.push(Key::UniqueKey(uk)); + } + return res; + } + vec![] + } } diff --git a/xngin-compute/Cargo.toml b/doradb-compute/Cargo.toml similarity index 53% rename from xngin-compute/Cargo.toml rename to doradb-compute/Cargo.toml index edbe20c..6707aec 100644 --- a/xngin-compute/Cargo.toml +++ b/doradb-compute/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-compute" +name = "doradb-compute" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,15 +7,15 @@ license = "MIT OR Apache-2.0" description = "Computation module of X-Engine" keywords = ["database", "vectorization"] categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-compute/" +repository = "https://github.com/jiangzhe/doradb/doradb-compute/" [dependencies] thiserror = "1.0" libc = "0.2.101" -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } -xngin-storage = { version = "0.1.0", path = "../xngin-storage" } -xngin-expr = { version = "0.1.0", path = "../xngin-expr" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } +doradb-storage = { version = "0.1.0", path = "../doradb-storage" } +doradb-expr = { version = "0.1.0", path = "../doradb-expr" } smallvec = {version = "1.8", features = ["union"]} [dev-dependencies] diff --git a/xngin-compute/src/arith.rs b/doradb-compute/src/arith.rs similarity index 98% rename from xngin-compute/src/arith.rs rename to doradb-compute/src/arith.rs index 3b765d2..5871063 100644 --- a/xngin-compute/src/arith.rs +++ b/doradb-compute/src/arith.rs @@ -1,11 +1,11 @@ use crate::error::{Error, Result}; use crate::BinaryEval; -use xngin_datatype::PreciseType; -use xngin_storage::array::Array; -use xngin_storage::attr::Attr; -use xngin_storage::codec::{Codec, Single}; -use xngin_storage::repr::ByteRepr; -use xngin_storage::sel::Sel; +use doradb_datatype::PreciseType; +use doradb_storage::array::Array; +use doradb_storage::attr::Attr; +use doradb_storage::codec::{Codec, Single}; +use doradb_storage::repr::ByteRepr; +use doradb_storage::sel::Sel; /// Kinds of arithmetic expression. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -279,7 +279,7 @@ impl_arith_eval_for_num!(AddI64, i64, i64, i64, +); #[cfg(test)] mod tests { use super::*; - use xngin_storage::codec::Single; + use doradb_storage::codec::Single; #[test] fn test_vec_eval_4096() { diff --git a/xngin-compute/src/cmp.rs b/doradb-compute/src/cmp.rs similarity index 98% rename from xngin-compute/src/cmp.rs rename to doradb-compute/src/cmp.rs index 0c1ab92..afb987b 100644 --- a/xngin-compute/src/cmp.rs +++ b/doradb-compute/src/cmp.rs @@ -1,12 +1,12 @@ use crate::error::{Error, Result}; use crate::BinaryEval; -use xngin_datatype::PreciseType; -use xngin_expr::PredFuncKind; -use xngin_storage::attr::Attr; -use xngin_storage::bitmap::Bitmap; -use xngin_storage::codec::{Codec, Single}; -use xngin_storage::repr::ByteRepr; -use xngin_storage::sel::Sel; +use doradb_datatype::PreciseType; +use doradb_expr::PredFuncKind; +use doradb_storage::attr::Attr; +use doradb_storage::bitmap::Bitmap; +use doradb_storage::codec::{Codec, Single}; +use doradb_storage::repr::ByteRepr; +use doradb_storage::sel::Sel; /// Kinds of comparison expression. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/xngin-compute/src/error.rs b/doradb-compute/src/error.rs similarity index 91% rename from xngin-compute/src/error.rs rename to doradb-compute/src/error.rs index d2ec787..c83f00f 100644 --- a/xngin-compute/src/error.rs +++ b/doradb-compute/src/error.rs @@ -1,5 +1,5 @@ use thiserror::Error; -use xngin_storage::error::Error as StorageError; +use doradb_storage::error::Error as StorageError; pub type Result = std::result::Result; @@ -26,7 +26,7 @@ pub enum Error { #[error("Invalid codec for selection")] InvalidCodecForSel, #[error("{0}")] - ExpressionError(#[from] xngin_expr::error::Error), + ExpressionError(#[from] doradb_expr::error::Error), } impl From for Error { diff --git a/xngin-compute/src/eval/builder.rs b/doradb-compute/src/eval/builder.rs similarity index 98% rename from xngin-compute/src/eval/builder.rs rename to doradb-compute/src/eval/builder.rs index 065bab6..76993d2 100644 --- a/xngin-compute/src/eval/builder.rs +++ b/doradb-compute/src/eval/builder.rs @@ -5,8 +5,9 @@ use crate::eval::{Eval, EvalPlan, EvalRef}; use crate::logic::LogicKind; use smallvec::{smallvec, SmallVec}; use std::collections::HashMap; -use xngin_datatype::PreciseType; -use xngin_expr::{ColIndex, DataSourceID, ExprKind, FuncKind, Pred, TypeInfer, TypeInferer}; +use doradb_catalog::ColIndex; +use doradb_datatype::PreciseType; +use doradb_expr::{DataSourceID, ExprKind, FuncKind, Pred, TypeInfer, TypeInferer}; #[derive(Debug)] pub(super) struct EvalBuilder<'a, T, I> { diff --git a/xngin-compute/src/eval/mod.rs b/doradb-compute/src/eval/mod.rs similarity index 96% rename from xngin-compute/src/eval/mod.rs rename to doradb-compute/src/eval/mod.rs index 844b58c..093a438 100644 --- a/xngin-compute/src/eval/mod.rs +++ b/doradb-compute/src/eval/mod.rs @@ -8,14 +8,14 @@ use crate::cmp::CmpKind; use crate::error::{Error, Result}; use crate::logic::LogicKind; use builder::EvalBuilder; -use xngin_datatype::{PreciseType, Typed}; -use xngin_expr::Const; -use xngin_storage::attr::Attr; -use xngin_storage::block::Block; -use xngin_storage::codec::Single; -use xngin_storage::sel::Sel; +use doradb_datatype::{PreciseType, Typed}; +use doradb_expr::Const; +use doradb_storage::attr::Attr; +use doradb_storage::block::Block; +use doradb_storage::codec::Single; +use doradb_storage::sel::Sel; -/// Eval is similar to [`xngin_expr::Expr`], but only for evaluation. +/// Eval is similar to [`doradb_expr::Expr`], but only for evaluation. /// It supports deterministic scalar expressions and is restricted to /// evaluate within single block. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -194,10 +194,11 @@ impl EvalRef { #[cfg(test)] mod tests { use super::*; - use xngin_expr::util::{TypeFix, TypeInferer}; - use xngin_expr::{Col, ColIndex, ColKind, ExprKind, FuncKind, GlobalID, PredFuncKind, QueryID}; - use xngin_storage::attr::Attr; - use xngin_storage::block::Block; + use doradb_catalog::ColIndex; + use doradb_expr::util::{TypeFix, TypeInferer}; + use doradb_expr::{Col, ColKind, ExprKind, FuncKind, GlobalID, PredFuncKind, QueryID}; + use doradb_storage::attr::Attr; + use doradb_storage::block::Block; #[test] fn test_build_eval() { diff --git a/xngin-compute/src/eval/plan.rs b/doradb-compute/src/eval/plan.rs similarity index 96% rename from xngin-compute/src/eval/plan.rs rename to doradb-compute/src/eval/plan.rs index 7a0354f..e9eb876 100644 --- a/xngin-compute/src/eval/plan.rs +++ b/doradb-compute/src/eval/plan.rs @@ -2,11 +2,11 @@ use crate::error::{Error, Result}; use crate::eval::{Eval, EvalBuilder, EvalRef}; use std::mem; -use xngin_catalog::TableID; -use xngin_expr::{ColIndex, DataSourceID, ExprKind, QueryID, TypeInferer}; -use xngin_storage::attr::Attr; -use xngin_storage::block::Block; -use xngin_storage::sel::Sel; +use doradb_catalog::{ColIndex, TableID}; +use doradb_expr::{DataSourceID, ExprKind, QueryID, TypeInferer}; +use doradb_storage::attr::Attr; +use doradb_storage::block::Block; +use doradb_storage::sel::Sel; pub type TableEvalPlan = EvalPlan; pub type QueryEvalPlan = EvalPlan; diff --git a/xngin-compute/src/lib.rs b/doradb-compute/src/lib.rs similarity index 83% rename from xngin-compute/src/lib.rs rename to doradb-compute/src/lib.rs index 6d01172..976b916 100644 --- a/xngin-compute/src/lib.rs +++ b/doradb-compute/src/lib.rs @@ -5,9 +5,9 @@ pub mod eval; pub mod logic; use crate::error::Result; -use xngin_datatype::PreciseType; -use xngin_storage::attr::Attr; -use xngin_storage::sel::Sel; +use doradb_datatype::PreciseType; +use doradb_storage::attr::Attr; +use doradb_storage::sel::Sel; /// Evaluation of binary expression. pub trait BinaryEval { diff --git a/xngin-compute/src/logic.rs b/doradb-compute/src/logic.rs similarity index 99% rename from xngin-compute/src/logic.rs rename to doradb-compute/src/logic.rs index 4063cec..05b9701 100644 --- a/xngin-compute/src/logic.rs +++ b/doradb-compute/src/logic.rs @@ -1,10 +1,10 @@ use crate::error::{Error, Result}; use crate::{BinaryEval, UnaryEval}; -use xngin_datatype::PreciseType; -use xngin_storage::attr::Attr; -use xngin_storage::bitmap::Bitmap; -use xngin_storage::codec::{Codec, Single}; -use xngin_storage::sel::Sel; +use doradb_datatype::PreciseType; +use doradb_storage::attr::Attr; +use doradb_storage::bitmap::Bitmap; +use doradb_storage::codec::{Codec, Single}; +use doradb_storage::sel::Sel; /// Kinds of arithmetic expression. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/xngin-datatype/Cargo.toml b/doradb-datatype/Cargo.toml similarity index 78% rename from xngin-datatype/Cargo.toml rename to doradb-datatype/Cargo.toml index f59fc68..9e8182a 100644 --- a/xngin-datatype/Cargo.toml +++ b/doradb-datatype/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "xngin-datatype" +name = "doradb-datatype" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] license = "MIT OR Apache-2.0" description = "Datatype module of X-Engine" -repository = "https://github.com/jiangzhe/xngin/xngin-datatype/" +repository = "https://github.com/jiangzhe/doradb/doradb-datatype/" [dependencies] time = { version = "0.3", features = ["parsing", "formatting"] } diff --git a/xngin-datatype/src/align.rs b/doradb-datatype/src/align.rs similarity index 100% rename from xngin-datatype/src/align.rs rename to doradb-datatype/src/align.rs diff --git a/xngin-datatype/src/error.rs b/doradb-datatype/src/error.rs similarity index 100% rename from xngin-datatype/src/error.rs rename to doradb-datatype/src/error.rs diff --git a/xngin-datatype/src/konst.rs b/doradb-datatype/src/konst.rs similarity index 100% rename from xngin-datatype/src/konst.rs rename to doradb-datatype/src/konst.rs diff --git a/xngin-datatype/src/lib.rs b/doradb-datatype/src/lib.rs similarity index 100% rename from xngin-datatype/src/lib.rs rename to doradb-datatype/src/lib.rs diff --git a/xngin-datatype/src/memcmp.rs b/doradb-datatype/src/memcmp.rs similarity index 100% rename from xngin-datatype/src/memcmp.rs rename to doradb-datatype/src/memcmp.rs diff --git a/xngin-expr/Cargo.toml b/doradb-expr/Cargo.toml similarity index 55% rename from xngin-expr/Cargo.toml rename to doradb-expr/Cargo.toml index d10d0e7..c947295 100644 --- a/xngin-expr/Cargo.toml +++ b/doradb-expr/Cargo.toml @@ -1,15 +1,15 @@ [package] -name = "xngin-expr" +name = "doradb-expr" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] license = "MIT OR Apache-2.0" description = "Expression module of X-Engine" -repository = "https://github.com/jiangzhe/xngin/xngin-expr/" +repository = "https://github.com/jiangzhe/doradb/doradb-expr/" [dependencies] -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } smallvec = {version = "1.8", features = ["union"]} thiserror = "1.0" semistr = "0.1" diff --git a/xngin-expr/src/controlflow.rs b/doradb-expr/src/controlflow.rs similarity index 100% rename from xngin-expr/src/controlflow.rs rename to doradb-expr/src/controlflow.rs diff --git a/xngin-expr/src/error.rs b/doradb-expr/src/error.rs similarity index 100% rename from xngin-expr/src/error.rs rename to doradb-expr/src/error.rs diff --git a/xngin-expr/src/expr.rs b/doradb-expr/src/expr.rs similarity index 69% rename from xngin-expr/src/expr.rs rename to doradb-expr/src/expr.rs index bea6a16..dcd12e7 100644 --- a/xngin-expr/src/expr.rs +++ b/doradb-expr/src/expr.rs @@ -1,22 +1,16 @@ use crate::controlflow::ControlFlow; use crate::func::FuncKind; -use crate::id::{ColIndex, GlobalID, QueryID}; +use crate::id::{GlobalID, QueryID}; use crate::pred::{Pred, PredFuncKind}; use semistr::SemiStr; use smallvec::{smallvec, SmallVec}; use std::collections::HashSet; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use xngin_catalog::TableID; -pub use xngin_datatype::{Const, ValidF64}; -use xngin_datatype::{Date, Datetime, Decimal, Interval, PreciseType, Time, TimeUnit}; - -// #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] -// pub struct Expr { -// pub kind: ExprKind, -// pub ty: PreciseType, -// } +use doradb_catalog::{ColIndex, TableID, TblCol}; +pub use doradb_datatype::{Const, ValidF64}; +use doradb_datatype::{Date, Datetime, Decimal, Interval, PreciseType, Time, TimeUnit}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ExprKind { @@ -58,6 +52,13 @@ pub enum ExprKind { Plhd(Plhd), /// Predefined function argument. Farg(Farg), + /// Functional dependency of table. + /// If table has three columns c1, c2, c3. + /// and c1 is primary key or unique key, + /// c2 and c3 have functional dependency of c1, which + /// means c1 value can uniquely determine c2 + /// or c3 value. + FnlDep(FnlDep), } impl ExprKind { @@ -282,6 +283,15 @@ impl ExprKind { }) } + #[inline] + pub fn setop_col(gid: GlobalID, col_idx: ColIndex, left: ExprKind, right: ExprKind) -> Self { + ExprKind::Col(Col { + gid, + kind: ColKind::Setop(Box::new([left, right])), + idx: col_idx, + }) + } + #[inline] pub fn func(kind: FuncKind, args: Vec) -> Self { debug_assert!({ @@ -345,6 +355,14 @@ impl ExprKind { ExprKind::Subq(kind, qry_id) } + #[inline] + pub fn tbl_fnl_dep(table_id: TableID, col_idx: ColIndex, keys: Vec) -> Self { + ExprKind::FnlDep(FnlDep { + tbl_col: TblCol(table_id, col_idx), + keys: keys.into_boxed_slice(), + }) + } + #[inline] pub fn n_args(&self) -> usize { match &self { @@ -365,6 +383,7 @@ impl ExprKind { Pred::Exists(_) | Pred::NotExists(_) => 1, }, ExprKind::Tuple(es) => es.len(), + ExprKind::FnlDep(FnlDep { keys: args, .. }) => args.len(), } } @@ -374,13 +393,17 @@ impl ExprKind { pub fn args(&self) -> SmallVec<[&ExprKind; 2]> { match &self { ExprKind::Const(_) - | ExprKind::Col(..) | ExprKind::Plhd(_) | ExprKind::Subq(..) | ExprKind::Farg(_) | ExprKind::Attval(_) => { smallvec![] } + ExprKind::Col(Col { + kind: ColKind::Setop(args), + .. + }) => args.iter().collect(), + ExprKind::Col(Col { .. }) => smallvec![], ExprKind::Aggf { arg, .. } | ExprKind::Cast { arg, .. } => smallvec![arg.as_ref()], ExprKind::Func { args, .. } => args.iter().collect(), ExprKind::Case { op, acts, fallback } => { @@ -401,7 +424,8 @@ impl ExprKind { } Pred::Exists(subq) | Pred::NotExists(subq) => smallvec![subq.as_ref()], }, - ExprKind::Tuple(es) => SmallVec::from_iter(es.iter()), + ExprKind::Tuple(es) => es.iter().collect(), + ExprKind::FnlDep(FnlDep { keys: args, .. }) => args.iter().collect(), } } @@ -439,152 +463,17 @@ impl ExprKind { } Pred::Exists(subq) | Pred::NotExists(subq) => smallvec![subq.as_mut()], }, - ExprKind::Tuple(es) => SmallVec::from_iter(es.iter_mut()), - } - } - - pub fn walk<'a, V: ExprVisitor<'a>>( - &'a self, - visitor: &mut V, - ) -> ControlFlow { - let mut eff = visitor.enter(self)?; - for c in self.args() { - eff.merge(c.walk(visitor)?) - } - eff.merge(visitor.leave(self)?); - ControlFlow::Continue(eff) - } - - pub fn walk_mut( - &mut self, - visitor: &mut V, - ) -> ControlFlow { - let mut eff = visitor.enter(self)?; - for c in self.args_mut() { - eff.merge(c.walk_mut(visitor)?) - } - eff.merge(visitor.leave(self)?); - ControlFlow::Continue(eff) - } - - #[inline] - pub fn collect_non_aggr_cols(&self) -> (Vec, bool) { - let mut cols = vec![]; - let has_aggr = self.collect_non_aggr_cols_into(&mut cols); - (cols, has_aggr) - } - - /// Collect non-aggr columns and returns true if aggr function exists - #[inline] - pub fn collect_non_aggr_cols_into(&self, cols: &mut Vec) -> bool { - struct Collect<'a> { - aggr_lvl: usize, - has_aggr: bool, - cols: &'a mut Vec, - } - impl<'a> ExprVisitor<'a> for Collect<'_> { - type Cont = (); - type Break = (); - #[inline] - fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { - match e { - ExprKind::Aggf { .. } => { - self.aggr_lvl += 1; - self.has_aggr = true - } - ExprKind::Col(col) => { - if self.aggr_lvl == 0 { - self.cols.push(col.clone()) - } - } - _ => (), - } - ControlFlow::Continue(()) - } - - #[inline] - fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { - if let ExprKind::Aggf { .. } = e { - self.aggr_lvl -= 1 - } - ControlFlow::Continue(()) - } - } - let mut c = Collect { - aggr_lvl: 0, - cols, - has_aggr: false, - }; - let _ = self.walk(&mut c); - c.has_aggr - } - - #[inline] - pub fn contains_aggr_func(&self) -> bool { - struct Contains(bool); - impl<'a> ExprVisitor<'a> for Contains { - type Cont = (); - type Break = (); - #[inline] - fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { - if let ExprKind::Aggf { .. } = e { - self.0 = true; - return ControlFlow::Break(()); - } - ControlFlow::Continue(()) - } + ExprKind::Tuple(es) => es.iter_mut().collect(), + ExprKind::FnlDep(FnlDep { keys: args, .. }) => args.iter_mut().collect(), } - let mut c = Contains(false); - let _ = self.walk(&mut c); - c.0 } #[inline] - pub fn contains_non_aggr_cols(&self) -> bool { - struct Contains { - aggr_lvl: usize, - has_non_aggr_cols: bool, - } - - impl<'a> ExprVisitor<'a> for Contains { - type Cont = (); - type Break = (); - #[inline] - fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { - match e { - ExprKind::Aggf { .. } => self.aggr_lvl += 1, - ExprKind::Col(_) => { - if self.aggr_lvl == 0 { - self.has_non_aggr_cols = true; - return ControlFlow::Break(()); - } - } - _ => (), - } - ControlFlow::Continue(()) - } - - #[inline] - fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { - if let ExprKind::Aggf { .. } = e { - self.aggr_lvl -= 1 - } - ControlFlow::Continue(()) - } + pub fn col_gid(&self) -> Option { + match self { + ExprKind::Col(Col { gid, .. }) => Some(*gid), + _ => None, } - - let mut c = Contains { - aggr_lvl: 0, - has_non_aggr_cols: false, - }; - let _ = self.walk(&mut c); - c.has_non_aggr_cols - } - - #[inline] - pub fn collect_qry_ids(&self, hs: &mut HashSet) { - let mut c = CollectQryIDs(hs); - let _ = self.walk(&mut c); } } @@ -594,19 +483,20 @@ impl Default for ExprKind { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Col { pub gid: GlobalID, pub kind: ColKind, pub idx: ColIndex, } -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ColKind { // table id and column name Table(TableID, SemiStr, PreciseType), Query(QueryID), Correlated(QueryID), + Setop(Box<[ExprKind; 2]>), /// Intra column. Used to chain output of operator nodes. /// For example, we may have aggregation expression "sum(c1)+1" /// in SELECT list. @@ -619,6 +509,16 @@ pub enum ColKind { Intra(u8), } +impl ColKind { + #[inline] + pub fn qry_id(&self) -> Option { + match self { + ColKind::Query(qid) | ColKind::Correlated(qid) => Some(*qid), + _ => None, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Setq { All, @@ -668,63 +568,10 @@ pub enum Farg { TimeUnit(TimeUnit), } -pub trait Effect: Default { - fn merge(&mut self, other: Self); -} - -impl Effect for () { - #[inline] - fn merge(&mut self, _other: Self) {} -} - -pub trait ExprVisitor<'a>: Sized { - type Cont: Effect; - type Break; - /// Returns true if continue - #[inline] - fn enter(&mut self, _e: &'a ExprKind) -> ControlFlow { - ControlFlow::Continue(Self::Cont::default()) - } - - /// Returns true if continue - #[inline] - fn leave(&mut self, _e: &'a ExprKind) -> ControlFlow { - ControlFlow::Continue(Self::Cont::default()) - } -} - -pub trait ExprMutVisitor { - type Cont: Effect; - type Break; - /// Returns true if continue - #[inline] - fn enter(&mut self, _e: &mut ExprKind) -> ControlFlow { - ControlFlow::Continue(Self::Cont::default()) - } - - /// Returns true if continue - #[inline] - fn leave(&mut self, _e: &mut ExprKind) -> ControlFlow { - ControlFlow::Continue(Self::Cont::default()) - } -} - -pub struct CollectQryIDs<'a>(pub &'a mut HashSet); - -impl<'a> ExprVisitor<'a> for CollectQryIDs<'_> { - type Cont = (); - type Break = (); - #[inline] - fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { - if let ExprKind::Col(Col { - kind: ColKind::Query(qry_id), - .. - }) = &e - { - self.0.insert(*qry_id); - } - ControlFlow::Continue(()) - } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FnlDep { + pub tbl_col: TblCol, + pub keys: Box<[ExprKind]>, } #[cfg(test)] diff --git a/doradb-expr/src/expr_ext.rs b/doradb-expr/src/expr_ext.rs new file mode 100644 index 0000000..ab347ac --- /dev/null +++ b/doradb-expr/src/expr_ext.rs @@ -0,0 +1,234 @@ +use crate::expr::{ExprKind, Col, ColKind}; +use crate::id::QueryID; +use std::collections::HashSet; +use std::ops::ControlFlow; + +pub trait Effect: Default { + fn merge(&mut self, other: Self); +} + +impl Effect for () { + #[inline] + fn merge(&mut self, _other: Self) {} +} + +pub trait ExprVisitor<'a>: Sized { + type Cont: Effect; + type Break; + /// Returns true if continue + #[inline] + fn enter(&mut self, _e: &'a ExprKind) -> ControlFlow { + ControlFlow::Continue(Self::Cont::default()) + } + + /// Returns true if continue + #[inline] + fn leave(&mut self, _e: &'a ExprKind) -> ControlFlow { + ControlFlow::Continue(Self::Cont::default()) + } +} + +pub trait ExprMutVisitor { + type Cont: Effect; + type Break; + /// Returns true if continue + #[inline] + fn enter(&mut self, _e: &mut ExprKind) -> ControlFlow { + ControlFlow::Continue(Self::Cont::default()) + } + + /// Returns true if continue + #[inline] + fn leave(&mut self, _e: &mut ExprKind) -> ControlFlow { + ControlFlow::Continue(Self::Cont::default()) + } +} + +pub struct CollectQryIDs<'a>(pub &'a mut HashSet); + +impl<'a> ExprVisitor<'a> for CollectQryIDs<'_> { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { + if let ExprKind::Col(Col { + kind: ColKind::Query(qry_id), + .. + }) = &e + { + self.0.insert(*qry_id); + } + ControlFlow::Continue(()) + } +} + +/// Extended opeartions on ExprKind. +pub trait ExprExt { + fn walk<'a, V: ExprVisitor<'a>>( + &'a self, + visitor: &mut V, + ) -> ControlFlow; + + fn walk_mut( + &mut self, + visitor: &mut V, + ) -> ControlFlow; + + fn collect_non_aggr_cols(&self) -> (Vec, bool); + + fn collect_non_aggr_cols_into(&self, cols: &mut Vec) -> bool; + + fn contains_aggr_func(&self) -> bool; + + fn contains_non_aggr_cols(&self) -> bool; + + fn collect_qry_ids(&self, hs: &mut HashSet); +} + +impl ExprExt for ExprKind { + + fn walk<'a, V: ExprVisitor<'a>>( + &'a self, + visitor: &mut V, + ) -> ControlFlow { + let mut eff = visitor.enter(self)?; + for c in self.args() { + eff.merge(c.walk(visitor)?) + } + eff.merge(visitor.leave(self)?); + ControlFlow::Continue(eff) + } + + fn walk_mut( + &mut self, + visitor: &mut V, + ) -> ControlFlow { + let mut eff = visitor.enter(self)?; + for c in self.args_mut() { + eff.merge(c.walk_mut(visitor)?) + } + eff.merge(visitor.leave(self)?); + ControlFlow::Continue(eff) + } + + // collect non-aggr columns and whether any aggr columns found. + #[inline] + fn collect_non_aggr_cols(&self) -> (Vec, bool) { + let mut cols = vec![]; + let has_aggr = self.collect_non_aggr_cols_into(&mut cols); + (cols, has_aggr) + } + + /// Collect non-aggr columns and returns true if aggr function exists + #[inline] + fn collect_non_aggr_cols_into(&self, cols: &mut Vec) -> bool { + struct Collect<'a> { + aggr_lvl: usize, + has_aggr: bool, + cols: &'a mut Vec, + } + impl<'a> ExprVisitor<'a> for Collect<'_> { + type Cont = (); + type Break = (); + #[inline] + fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { + match e { + ExprKind::Aggf { .. } => { + self.aggr_lvl += 1; + self.has_aggr = true + } + ExprKind::Col(col) => { + if self.aggr_lvl == 0 { + self.cols.push(col.clone()) + } + } + _ => (), + } + ControlFlow::Continue(()) + } + + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { + if let ExprKind::Aggf { .. } = e { + self.aggr_lvl -= 1 + } + ControlFlow::Continue(()) + } + } + let mut c = Collect { + aggr_lvl: 0, + cols, + has_aggr: false, + }; + let _ = self.walk(&mut c); + c.has_aggr + } + + #[inline] + fn contains_aggr_func(&self) -> bool { + struct Contains(bool); + impl<'a> ExprVisitor<'a> for Contains { + type Cont = (); + type Break = (); + #[inline] + fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { + if let ExprKind::Aggf { .. } = e { + self.0 = true; + return ControlFlow::Break(()); + } + ControlFlow::Continue(()) + } + } + let mut c = Contains(false); + let _ = self.walk(&mut c); + c.0 + } + + #[inline] + fn contains_non_aggr_cols(&self) -> bool { + struct Contains { + aggr_lvl: usize, + has_non_aggr_cols: bool, + } + + impl<'a> ExprVisitor<'a> for Contains { + type Cont = (); + type Break = (); + #[inline] + fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { + match e { + ExprKind::Aggf { .. } => self.aggr_lvl += 1, + ExprKind::Col(_) => { + if self.aggr_lvl == 0 { + self.has_non_aggr_cols = true; + return ControlFlow::Break(()); + } + } + _ => (), + } + ControlFlow::Continue(()) + } + + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<()> { + if let ExprKind::Aggf { .. } = e { + self.aggr_lvl -= 1 + } + ControlFlow::Continue(()) + } + } + + let mut c = Contains { + aggr_lvl: 0, + has_non_aggr_cols: false, + }; + let _ = self.walk(&mut c); + c.has_non_aggr_cols + } + + #[inline] + fn collect_qry_ids(&self, hs: &mut HashSet) { + let mut c = CollectQryIDs(hs); + let _ = self.walk(&mut c); + } +} \ No newline at end of file diff --git a/xngin-expr/src/fold/add.rs b/doradb-expr/src/fold/add.rs similarity index 99% rename from xngin-expr/src/fold/add.rs rename to doradb-expr/src/fold/add.rs index 22bcd70..5a718c9 100644 --- a/xngin-expr/src/fold/add.rs +++ b/doradb-expr/src/fold/add.rs @@ -1,6 +1,6 @@ use crate::error::{Error, Result}; use crate::{Const, ExprKind}; -use xngin_datatype::Decimal; +use doradb_datatype::Decimal; #[inline] pub fn fold_add(lhs: &ExprKind, rhs: &ExprKind) -> Result> { diff --git a/xngin-expr/src/fold/cmp.rs b/doradb-expr/src/fold/cmp.rs similarity index 99% rename from xngin-expr/src/fold/cmp.rs rename to doradb-expr/src/fold/cmp.rs index ffd00cd..2f94f48 100644 --- a/xngin-expr/src/fold/cmp.rs +++ b/doradb-expr/src/fold/cmp.rs @@ -1,7 +1,7 @@ use crate::error::Result; use crate::{Const, ExprKind}; use std::cmp::Ordering; -use xngin_datatype::AlignPartialOrd; +use doradb_datatype::AlignPartialOrd; macro_rules! impl_fold_cmp { ( $fn:ident, $fnc:ident, $($e:pat),* ) => { diff --git a/xngin-expr/src/fold/mod.rs b/doradb-expr/src/fold/mod.rs similarity index 97% rename from xngin-expr/src/fold/mod.rs rename to doradb-expr/src/fold/mod.rs index 04f14f4..1871a86 100644 --- a/xngin-expr/src/fold/mod.rs +++ b/doradb-expr/src/fold/mod.rs @@ -7,6 +7,7 @@ mod sub; use crate::controlflow::{ControlFlow, Unbranch}; use crate::error::{Error, Result}; use crate::{Const, ExprKind, ExprMutVisitor, FuncKind, Pred, PredFuncKind}; +use crate::expr_ext::ExprExt; pub use add::*; pub use cmp::*; @@ -103,7 +104,7 @@ pub(crate) mod tests { } pub(crate) fn new_decimal(s: &str) -> Const { - let d: xngin_datatype::Decimal = s.parse().unwrap(); + let d: doradb_datatype::Decimal = s.parse().unwrap(); Decimal(d) } diff --git a/xngin-expr/src/fold/neg.rs b/doradb-expr/src/fold/neg.rs similarity index 99% rename from xngin-expr/src/fold/neg.rs rename to doradb-expr/src/fold/neg.rs index cd4f9a5..70d3161 100644 --- a/xngin-expr/src/fold/neg.rs +++ b/doradb-expr/src/fold/neg.rs @@ -1,6 +1,6 @@ use crate::error::Result; use crate::{Const, ExprKind}; -use xngin_datatype::Decimal; +use doradb_datatype::Decimal; #[inline] pub fn fold_neg(arg: &ExprKind) -> Result> { diff --git a/xngin-expr/src/fold/not.rs b/doradb-expr/src/fold/not.rs similarity index 100% rename from xngin-expr/src/fold/not.rs rename to doradb-expr/src/fold/not.rs diff --git a/xngin-expr/src/fold/sub.rs b/doradb-expr/src/fold/sub.rs similarity index 99% rename from xngin-expr/src/fold/sub.rs rename to doradb-expr/src/fold/sub.rs index 7cc723d..600486c 100644 --- a/xngin-expr/src/fold/sub.rs +++ b/doradb-expr/src/fold/sub.rs @@ -1,6 +1,6 @@ use crate::error::{Error, Result}; use crate::{Const, ExprKind}; -use xngin_datatype::Decimal; +use doradb_datatype::Decimal; #[inline] pub fn fold_sub(lhs: &ExprKind, rhs: &ExprKind) -> Result> { diff --git a/xngin-expr/src/func.rs b/doradb-expr/src/func.rs similarity index 100% rename from xngin-expr/src/func.rs rename to doradb-expr/src/func.rs diff --git a/xngin-expr/src/id.rs b/doradb-expr/src/id.rs similarity index 65% rename from xngin-expr/src/id.rs rename to doradb-expr/src/id.rs index c19b9bd..53c2283 100644 --- a/xngin-expr/src/id.rs +++ b/doradb-expr/src/id.rs @@ -1,4 +1,5 @@ use std::ops::Deref; +use doradb_catalog::ColIndex; /// ColIndex wraps u32 to be the index of column in current table/subquery. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -11,11 +12,17 @@ impl GlobalID { GlobalID(self.0 + 1) } + // #[inline] + // pub fn fetch_inc(&mut self) -> Self { + // let val = self.0; + // self.0 += 1; + // GlobalID(val) + // } + #[inline] - pub fn fetch_inc(&mut self) -> Self { - let val = self.0; + pub fn inc_fetch(&mut self) -> Self { self.0 += 1; - GlobalID(val) + GlobalID(self.0) } #[inline] @@ -66,30 +73,7 @@ impl Deref for QueryID { } } -pub const INVALID_QUERY_ID: QueryID = QueryID(!0); - -/// ColIndex wraps u32 to be the index of column in current table/subquery. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct ColIndex(u32); - -impl ColIndex { - #[inline] - pub fn value(&self) -> u32 { - self.0 - } -} - -impl From for ColIndex { - fn from(src: u32) -> Self { - ColIndex(src) - } -} - -impl std::fmt::Display for ColIndex { - #[inline] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "c{}", self.0) - } -} +pub const INVALID_QUERY_ID: QueryID = QueryID(0); -pub type QueryCol = (QueryID, ColIndex); +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct QryCol(pub QueryID, pub ColIndex); diff --git a/xngin-expr/src/lib.rs b/doradb-expr/src/lib.rs similarity index 84% rename from xngin-expr/src/lib.rs rename to doradb-expr/src/lib.rs index b6cd50a..074c100 100644 --- a/xngin-expr/src/lib.rs +++ b/doradb-expr/src/lib.rs @@ -1,6 +1,7 @@ pub mod controlflow; pub mod error; pub mod expr; +pub mod expr_ext; pub mod fold; pub mod func; pub mod id; @@ -8,7 +9,11 @@ pub mod pred; pub mod source; pub mod util; +// re-export column index +pub use doradb_catalog::ColIndex; + pub use crate::expr::*; +pub use crate::expr_ext::*; pub use crate::func::*; pub use crate::id::*; pub use crate::pred::*; diff --git a/xngin-expr/src/pred.rs b/doradb-expr/src/pred.rs similarity index 100% rename from xngin-expr/src/pred.rs rename to doradb-expr/src/pred.rs diff --git a/xngin-expr/src/source.rs b/doradb-expr/src/source.rs similarity index 93% rename from xngin-expr/src/source.rs rename to doradb-expr/src/source.rs index 5f966b5..aff30b2 100644 --- a/xngin-expr/src/source.rs +++ b/doradb-expr/src/source.rs @@ -1,7 +1,7 @@ use crate::expr::{Col, ColKind, ExprKind}; -use crate::id::{ColIndex, QueryID}; +use crate::id::QueryID; use std::hash::Hash; -use xngin_catalog::TableID; +use doradb_catalog::{ColIndex, TableID}; pub trait DataSourceID: Clone + Copy + PartialEq + Eq + Hash + PartialOrd + Ord + Sized { /// resolve data source from expression diff --git a/xngin-expr/src/util.rs b/doradb-expr/src/util.rs similarity index 99% rename from xngin-expr/src/util.rs rename to doradb-expr/src/util.rs index 889b547..e8beae0 100644 --- a/xngin-expr/src/util.rs +++ b/doradb-expr/src/util.rs @@ -1,11 +1,11 @@ use crate::controlflow::{Branch, ControlFlow, Unbranch}; use crate::error::{Error, Result}; -use crate::id::{ColIndex, QueryID}; use crate::{Col, ColKind, ExprKind, ExprMutVisitor, FuncKind, Pred, PredFuncKind}; use std::cmp::Ordering; use std::collections::HashMap; use std::mem; -use xngin_datatype::{Collation, PreciseType, Typed}; +use doradb_catalog::ColIndex; +use doradb_datatype::{Collation, PreciseType, Typed}; use std::hash::{Hash, Hasher}; diff --git a/xngin-index/Cargo.toml b/doradb-index/Cargo.toml similarity index 82% rename from xngin-index/Cargo.toml rename to doradb-index/Cargo.toml index 38b9e6a..7380891 100644 --- a/xngin-index/Cargo.toml +++ b/doradb-index/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-index" +name = "doradb-index" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0" description = "Index module of X-Engine" keywords = ["index"] categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-index/" +repository = "https://github.com/jiangzhe/doradb/doradb-index/" [dependencies] scopeguard = {version = "1.1", default-features = false} diff --git a/xngin-index/src/epoch/atomic/inline.rs b/doradb-index/src/epoch/atomic/inline.rs similarity index 100% rename from xngin-index/src/epoch/atomic/inline.rs rename to doradb-index/src/epoch/atomic/inline.rs diff --git a/xngin-index/src/epoch/atomic/mod.rs b/doradb-index/src/epoch/atomic/mod.rs similarity index 100% rename from xngin-index/src/epoch/atomic/mod.rs rename to doradb-index/src/epoch/atomic/mod.rs diff --git a/xngin-index/src/epoch/atomic/owned.rs b/doradb-index/src/epoch/atomic/owned.rs similarity index 100% rename from xngin-index/src/epoch/atomic/owned.rs rename to doradb-index/src/epoch/atomic/owned.rs diff --git a/xngin-index/src/epoch/atomic/shared.rs b/doradb-index/src/epoch/atomic/shared.rs similarity index 100% rename from xngin-index/src/epoch/atomic/shared.rs rename to doradb-index/src/epoch/atomic/shared.rs diff --git a/xngin-index/src/epoch/collector.rs b/doradb-index/src/epoch/collector.rs similarity index 100% rename from xngin-index/src/epoch/collector.rs rename to doradb-index/src/epoch/collector.rs diff --git a/xngin-index/src/epoch/guard.rs b/doradb-index/src/epoch/guard.rs similarity index 100% rename from xngin-index/src/epoch/guard.rs rename to doradb-index/src/epoch/guard.rs diff --git a/xngin-index/src/epoch/internal.rs b/doradb-index/src/epoch/internal.rs similarity index 100% rename from xngin-index/src/epoch/internal.rs rename to doradb-index/src/epoch/internal.rs diff --git a/xngin-index/src/epoch/list.rs b/doradb-index/src/epoch/list.rs similarity index 100% rename from xngin-index/src/epoch/list.rs rename to doradb-index/src/epoch/list.rs diff --git a/xngin-index/src/epoch/macros.rs b/doradb-index/src/epoch/macros.rs similarity index 100% rename from xngin-index/src/epoch/macros.rs rename to doradb-index/src/epoch/macros.rs diff --git a/xngin-index/src/epoch/mod.rs b/doradb-index/src/epoch/mod.rs similarity index 100% rename from xngin-index/src/epoch/mod.rs rename to doradb-index/src/epoch/mod.rs diff --git a/xngin-index/src/epoch/queue.rs b/doradb-index/src/epoch/queue.rs similarity index 100% rename from xngin-index/src/epoch/queue.rs rename to doradb-index/src/epoch/queue.rs diff --git a/xngin-index/src/hot/key.rs b/doradb-index/src/hot/key.rs similarity index 100% rename from xngin-index/src/hot/key.rs rename to doradb-index/src/hot/key.rs diff --git a/xngin-index/src/hot/mod.rs b/doradb-index/src/hot/mod.rs similarity index 100% rename from xngin-index/src/hot/mod.rs rename to doradb-index/src/hot/mod.rs diff --git a/xngin-index/src/hot/node.rs b/doradb-index/src/hot/node.rs similarity index 100% rename from xngin-index/src/hot/node.rs rename to doradb-index/src/hot/node.rs diff --git a/xngin-index/src/hot/node_impl.rs b/doradb-index/src/hot/node_impl.rs similarity index 100% rename from xngin-index/src/hot/node_impl.rs rename to doradb-index/src/hot/node_impl.rs diff --git a/xngin-index/src/hot/partial_key.rs b/doradb-index/src/hot/partial_key.rs similarity index 100% rename from xngin-index/src/hot/partial_key.rs rename to doradb-index/src/hot/partial_key.rs diff --git a/xngin-index/src/hot/value.rs b/doradb-index/src/hot/value.rs similarity index 100% rename from xngin-index/src/hot/value.rs rename to doradb-index/src/hot/value.rs diff --git a/xngin-index/src/lib.rs b/doradb-index/src/lib.rs similarity index 100% rename from xngin-index/src/lib.rs rename to doradb-index/src/lib.rs diff --git a/doradb-plan/Cargo.toml b/doradb-plan/Cargo.toml new file mode 100644 index 0000000..39d52d1 --- /dev/null +++ b/doradb-plan/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "doradb-plan" +version = "0.1.0" +edition = "2021" +authors = ["jiangzhe "] +license = "MIT OR Apache-2.0" +description = "Planner module of X-Engine" +keywords = ["query", "planning", "optimization"] +categories = ["database-implementations"] +repository = "https://github.com/jiangzhe/doradb/doradb-plan/" + +[dependencies] +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } +doradb-sql = { version = "0.1.0", path = "../doradb-sql" } +doradb-expr = { version = "0.1.0", path = "../doradb-expr" } +doradb-storage = { version = "0.1.0", path = "../doradb-storage" } +doradb-compute = { version = "0.1.0", path = "../doradb-compute" } +aosa = "0.1" +slab = "0.4" +thiserror = "1.0" +semistr = "0.1" +fnv = "1.0" +indexmap = "1.7" +static_init = "1.0" +smallvec = {version = "1.8", features = ["union"]} +bitflags = "1.3" \ No newline at end of file diff --git a/xngin-plan/src/digraph.rs b/doradb-plan/src/digraph.rs similarity index 100% rename from xngin-plan/src/digraph.rs rename to doradb-plan/src/digraph.rs diff --git a/xngin-plan/src/error.rs b/doradb-plan/src/error.rs similarity index 95% rename from xngin-plan/src/error.rs rename to doradb-plan/src/error.rs index 2783cff..594b0a1 100644 --- a/xngin-plan/src/error.rs +++ b/doradb-plan/src/error.rs @@ -1,7 +1,7 @@ use semistr::SemiStr; use thiserror::Error; -use xngin_expr::QueryID; -use xngin_sql::ast::Ident; +use doradb_expr::QueryID; +use doradb_sql::ast::Ident; pub type Result = std::result::Result; @@ -68,13 +68,13 @@ pub enum Error { #[error(transparent)] ParseIntError(#[from] std::num::ParseIntError), #[error(transparent)] - ParseDecimalError(#[from] xngin_datatype::DecimalError), + ParseDecimalError(#[from] doradb_datatype::DecimalError), #[error(transparent)] - ParseDatetimeError(#[from] xngin_datatype::DatetimeParseError), + ParseDatetimeError(#[from] doradb_datatype::DatetimeParseError), #[error("Internal error MustOK")] MustOK, #[error(transparent)] - ExprError(#[from] xngin_expr::error::Error), + ExprError(#[from] doradb_expr::error::Error), #[error("Too many tables to join")] TooManyTablesToJoin, #[error("Query {0} not found")] @@ -110,7 +110,7 @@ pub enum Error { #[error("Empty plan")] EmptyPlan, #[error(transparent)] - Compute(#[from] xngin_compute::error::Error), + Compute(#[from] doradb_compute::error::Error), #[error("Unsupported physical table scan")] UnsupportedPhyTableScan, } diff --git a/xngin-plan/src/explain.rs b/doradb-plan/src/explain.rs similarity index 93% rename from xngin-plan/src/explain.rs rename to doradb-plan/src/explain.rs index c36298a..21c386a 100644 --- a/xngin-plan/src/explain.rs +++ b/doradb-plan/src/explain.rs @@ -1,11 +1,9 @@ use crate::join::graph::Edge; use crate::join::{Join, JoinGraph, QualifiedJoin}; -use crate::lgc::{ - Aggr, Apply, LgcPlan, Op, OpKind, OpOutput, OpVisitor, QuerySet, Setop, SortItem, -}; +use crate::lgc::{Aggr, Apply, LgcPlan, Op, OpKind, OpVisitor, QuerySet, Setop, SortItem}; use std::fmt::{self, Write}; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{AggKind, Col, ColKind, Const, ExprKind, Pred, QueryID, Setq}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{AggKind, Col, ColKind, Const, ExprKind, Pred, QueryID, Setq}; const INDENT: usize = 4; const BRANCH_1: char = '└'; @@ -80,13 +78,9 @@ impl Explain for Op { fn explain(&self, f: &mut F, conf: &ExplainConf) -> fmt::Result { match &self.kind { OpKind::Proj { cols, .. } => { - if let Some(cols) = cols { - f.write_str("Proj{")?; - write_refs(f, cols.iter().map(|c| &c.expr), ", ", conf)?; - f.write_str("}") - } else { - Err(fmt::Error) - } + f.write_str("Proj{")?; + write_refs(f, cols.iter().map(|c| &c.expr), ", ", conf)?; + f.write_str("}") } OpKind::Filt { pred, .. } => { f.write_str("Filt{")?; @@ -124,7 +118,13 @@ impl Explain for Op { OpKind::Scan(scan) => { write!(f, "Table{{name={},cols=[", scan.table)?; write_refs(f, scan.cols.iter().map(|c| &c.expr), ", ", conf)?; - f.write_str("]}}") + f.write_char(']')?; + if !scan.filt.is_empty() { + f.write_str(",filt=(")?; + write_refs(f, &scan.filt, " and ", conf)?; + f.write_char(')')?; + } + f.write_char('}') } OpKind::Query(_) => f.write_str("(subquery todo)"), OpKind::Empty => f.write_str("Empty"), @@ -245,13 +245,15 @@ impl Explain for Apply { } impl Explain for Setop { - fn explain(&self, f: &mut F, _conf: &ExplainConf) -> fmt::Result { + fn explain(&self, f: &mut F, conf: &ExplainConf) -> fmt::Result { f.write_str("Setop{")?; f.write_str(self.kind.to_lower())?; if self.q == Setq::All { f.write_str(" all")? } - f.write_char('}') + f.write_char('[')?; + write_refs(f, self.cols.iter().map(|pc| &pc.expr), ", ", conf)?; + f.write_str("]}") } } @@ -326,6 +328,7 @@ impl Explain for ExprKind { } ExprKind::Plhd(_) => write!(f, "(placeholder todo)"), ExprKind::Farg(_) => write!(f, "(funcarg todo)"), + ExprKind::FnlDep(_) => write!(f, "(fnldep todo)"), } } } @@ -385,6 +388,13 @@ impl Explain for Col { write!(f, "c#{}", self.gid.value()) } } + ColKind::Setop(..) => { + if conf.show_col_name { + write!(f, "[{}]#{}", self.idx.value(), self.gid.value()) + } else { + write!(f, "#{}", self.gid.value()) + } + } ColKind::Intra(_) => { if conf.show_col_name { write!(f, "i[{}]#{}", self.idx.value(), self.gid.value()) @@ -606,8 +616,8 @@ mod tests { use super::{Explain, ExplainConf}; use crate::lgc::tests::tpch_catalog; use crate::lgc::LgcPlan; - use xngin_sql::parser::dialect::MySQL; - use xngin_sql::parser::parse_query; + use doradb_sql::parser::dialect::MySQL; + use doradb_sql::parser::parse_query; #[test] fn test_explain_plan() { diff --git a/xngin-plan/src/join/estimate.rs b/doradb-plan/src/join/estimate.rs similarity index 100% rename from xngin-plan/src/join/estimate.rs rename to doradb-plan/src/join/estimate.rs diff --git a/xngin-plan/src/join/graph.rs b/doradb-plan/src/join/graph.rs similarity index 99% rename from xngin-plan/src/join/graph.rs rename to doradb-plan/src/join/graph.rs index 716f0b9..5e483fd 100644 --- a/xngin-plan/src/join/graph.rs +++ b/doradb-plan/src/join/graph.rs @@ -5,7 +5,7 @@ use indexmap::IndexMap; use smallvec::SmallVec; use std::collections::HashMap; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, Deref, DerefMut}; -use xngin_expr::{ExprKind, QueryID}; +use doradb_expr::{ExprKind, QueryID}; // Support at most 31 tables in single join graph. // The threshold is actually very high for DP algorithm diff --git a/xngin-plan/src/join/mod.rs b/doradb-plan/src/join/mod.rs similarity index 98% rename from xngin-plan/src/join/mod.rs rename to doradb-plan/src/join/mod.rs index 2c54ebc..a6e961b 100644 --- a/xngin-plan/src/join/mod.rs +++ b/doradb-plan/src/join/mod.rs @@ -6,7 +6,7 @@ use crate::error::{Error, Result}; use crate::lgc::{Op, OpKind}; use std::collections::HashSet; use std::ops::{Deref, DerefMut}; -use xngin_expr::{ExprKind, GlobalID, QueryID}; +use doradb_expr::{ExprKind, QueryID}; // alias of join graph pub use graph::Graph as JoinGraph; @@ -90,7 +90,6 @@ pub struct QualifiedJoin { pub kind: JoinKind, pub left: JoinOp, pub right: JoinOp, - // pub cond: Expr, // Join condition pub cond: Vec, // Additional filter applied after join. diff --git a/xngin-plan/src/join/reorder/dphyp.rs b/doradb-plan/src/join/reorder/dphyp.rs similarity index 100% rename from xngin-plan/src/join/reorder/dphyp.rs rename to doradb-plan/src/join/reorder/dphyp.rs diff --git a/xngin-plan/src/join/reorder/dpsize.rs b/doradb-plan/src/join/reorder/dpsize.rs similarity index 100% rename from xngin-plan/src/join/reorder/dpsize.rs rename to doradb-plan/src/join/reorder/dpsize.rs diff --git a/xngin-plan/src/join/reorder/greedy.rs b/doradb-plan/src/join/reorder/greedy.rs similarity index 100% rename from xngin-plan/src/join/reorder/greedy.rs rename to doradb-plan/src/join/reorder/greedy.rs diff --git a/xngin-plan/src/join/reorder/mod.rs b/doradb-plan/src/join/reorder/mod.rs similarity index 99% rename from xngin-plan/src/join/reorder/mod.rs rename to doradb-plan/src/join/reorder/mod.rs index 89ad14f..dc8e2a4 100644 --- a/xngin-plan/src/join/reorder/mod.rs +++ b/doradb-plan/src/join/reorder/mod.rs @@ -9,8 +9,8 @@ use crate::lgc::{LgcPlan, Op, OpKind, OpMutVisitor, QuerySet}; use std::borrow::Cow; use std::collections::HashMap; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::QueryID; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::QueryID; // Export GOO algorithm pub use greedy::Goo; @@ -211,7 +211,7 @@ mod tests { table_map, }; use crate::rule::joingraph_initialize; - use xngin_catalog::TableID; + use doradb_catalog::TableID; #[test] fn test_join_reorder_sequential() { diff --git a/xngin-plan/src/lgc/alias.rs b/doradb-plan/src/lgc/alias.rs similarity index 98% rename from xngin-plan/src/lgc/alias.rs rename to doradb-plan/src/lgc/alias.rs index 934fb5f..fcac197 100644 --- a/xngin-plan/src/lgc/alias.rs +++ b/doradb-plan/src/lgc/alias.rs @@ -2,7 +2,7 @@ use crate::error::{Error, Result}; use indexmap::{map, IndexMap}; use semistr::SemiStr; use std::ops::{Deref, DerefMut}; -use xngin_expr::QueryID; +use doradb_expr::QueryID; /// QueryAliases stores the aliases of tables. /// MySQL allows subquery and table has same alias. diff --git a/xngin-plan/src/lgc/builder.rs b/doradb-plan/src/lgc/builder.rs similarity index 96% rename from xngin-plan/src/lgc/builder.rs rename to doradb-plan/src/lgc/builder.rs index b1d105c..a0405e1 100644 --- a/xngin-plan/src/lgc/builder.rs +++ b/doradb-plan/src/lgc/builder.rs @@ -10,13 +10,12 @@ use crate::lgc::setop::{SetopKind, SubqOp}; use crate::lgc::LgcPlan; use crate::rule::expr_simplify::{simplify_nested, NullCoalesce}; use semistr::SemiStr; -use xngin_catalog::{Catalog, SchemaID, TableID}; -use xngin_expr::controlflow::ControlFlow; -use xngin_expr::{ - self as expr, ColIndex, ExprKind, ExprMutVisitor, GlobalID, Plhd, PredFuncKind, QueryID, Setq, - SubqKind, +use doradb_catalog::{Catalog, ColIndex, SchemaID, TableID}; +use doradb_expr::controlflow::ControlFlow; +use doradb_expr::{ + self as expr, ExprKind, ExprExt, ExprMutVisitor, Plhd, PredFuncKind, QueryID, Setq, SubqKind, }; -use xngin_sql::ast::*; +use doradb_sql::ast::*; pub struct LgcBuilder<'a, C: Catalog> { catalog: &'a C, @@ -54,6 +53,7 @@ impl<'c, C: Catalog> LgcBuilder<'c, C> { qry_set: self.qs, root, attaches: self.attaches, + max_cid: colgen.curr_gid(), }) } @@ -516,11 +516,47 @@ impl<'c, C: Catalog> LgcBuilder<'c, C> { let (qry_id, _) = self.build_subquery(&None, &select_set.left, phc.allow_unknown_ident, colgen)?; let left = SubqOp::query(qry_id); - let (query_id, _) = + let left_out: Vec<_> = self + .qs + .get(&qry_id) + .unwrap() + .out_cols() + .iter() + .enumerate() + .map(|(i, pc)| { + ( + colgen.gen_qry_col(qry_id, ColIndex::from(i as u32)), + pc.alias.clone(), + ) + }) + .collect(); + let (qry_id, _) = self.build_subquery(&None, &select_set.right, phc.allow_unknown_ident, colgen)?; - let right = SubqOp::query(query_id); + let right = SubqOp::query(qry_id); + // let right_out = self.qs.get(&qry_id).unwrap().out_cols(); + let right_out: Vec<_> = self + .qs + .get(&qry_id) + .unwrap() + .out_cols() + .iter() + .enumerate() + .map(|(i, _)| colgen.gen_qry_col(qry_id, ColIndex::from(i as u32))) + .collect(); + if left_out.len() != right_out.len() { + return Err(Error::ColumnCountMismatch); + } + let cols: Vec<_> = left_out + .into_iter() + .zip(right_out) + .enumerate() + .map(|(i, ((l, alias), r))| { + let e = colgen.gen_setop_col(ColIndex::from(i as u32), l, r); + ProjCol::implicit_alias(e, alias) + }) + .collect(); Ok(( - self.gen_op(OpKind::setop(kind, q, left, right)), + self.gen_op(OpKind::setop(kind, q, left, right, cols)), Location::Intermediate, )) } @@ -924,17 +960,10 @@ impl<'c, C: Catalog> LgcBuilder<'c, C> { let (qry_id, subquery) = self.qs.insert_empty(); for c in all_cols { let idx = ColIndex::from(c.idx); - let col = colgen.gen_tbl_col(qry_id, table_id, idx, c.pty, c.name.clone()); + let col = colgen.gen_tbl_col(table_id, idx, c.pty, c.name.clone()); proj_cols.push(ProjCol::implicit_alias(col, c.name)) } - let scan = OpKind::table( - qry_id, - schema_id, - schema_name, - table_id, - table_name, - proj_cols, - ); + let scan = OpKind::table(schema_id, schema_name, table_id, table_name, proj_cols); subquery.root = Op::new(scan); // todo: currently we assume all tables are located on disk. subquery.location = Location::Disk; diff --git a/xngin-plan/src/lgc/col.rs b/doradb-plan/src/lgc/col.rs similarity index 76% rename from xngin-plan/src/lgc/col.rs rename to doradb-plan/src/lgc/col.rs index 4886ca6..958e11b 100644 --- a/xngin-plan/src/lgc/col.rs +++ b/doradb-plan/src/lgc/col.rs @@ -1,9 +1,9 @@ use semistr::SemiStr; use std::collections::hash_map::Entry; use std::collections::HashMap; -use xngin_catalog::TableID; -use xngin_datatype::PreciseType; -use xngin_expr::{ColIndex, ExprKind, GlobalID, QueryCol, QueryID}; +use doradb_catalog::TableID; +use doradb_datatype::PreciseType; +use doradb_expr::{ColIndex, ExprKind, GlobalID, QryCol, QueryID}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ProjCol { @@ -62,11 +62,16 @@ pub enum AliasKind { #[derive(Default)] pub struct ColGen { - cid: GlobalID, - qm: HashMap, + gid: GlobalID, + qm: HashMap, } impl ColGen { + #[inline] + pub fn curr_gid(&self) -> GlobalID { + self.gid + } + /// Generate query column, with global id. /// If the same column is found, reuse global id. #[inline] @@ -75,30 +80,36 @@ impl ColGen { ExprKind::query_col(gid, qry_id, idx) } + /// Generate setop column, with global id. + #[inline] + pub fn gen_setop_col(&mut self, idx: ColIndex, left: ExprKind, right: ExprKind) -> ExprKind { + let gid = self.gid.inc_fetch(); + ExprKind::setop_col(gid, idx, left, right) + } + /// Generate table column. /// If same table appears multiple times in one query, we assign different /// query id to each table, therefore global id will also be different. #[inline] pub fn gen_tbl_col( &mut self, - qry_id: QueryID, table_id: TableID, idx: ColIndex, ty: PreciseType, col_name: SemiStr, ) -> ExprKind { - let gid = self.find_or_inc_cid(qry_id, idx); + let gid = self.gid.inc_fetch(); ExprKind::table_col(gid, table_id, idx, ty, col_name) } #[inline] fn find_or_inc_cid(&mut self, qry_id: QueryID, idx: ColIndex) -> GlobalID { - match self.qm.entry((qry_id, idx)) { + match self.qm.entry(QryCol(qry_id, idx)) { Entry::Occupied(occ) => *occ.get(), Entry::Vacant(vac) => { - self.cid = self.cid.next(); - vac.insert(self.cid); // store new cid - self.cid + let gid = self.gid.inc_fetch(); + vac.insert(gid); // store new cid + gid } } } diff --git a/xngin-plan/src/lgc/mod.rs b/doradb-plan/src/lgc/mod.rs similarity index 96% rename from xngin-plan/src/lgc/mod.rs rename to doradb-plan/src/lgc/mod.rs index 7ba41c9..b43f421 100644 --- a/xngin-plan/src/lgc/mod.rs +++ b/doradb-plan/src/lgc/mod.rs @@ -15,10 +15,10 @@ pub use op::*; pub use query::{Location, QryIDs, QuerySet, Subquery}; pub use reflect::reflect; pub use setop::{Setop, SetopKind, SubqOp}; -use xngin_catalog::Catalog; -use xngin_expr::controlflow::ControlFlow; -use xngin_expr::QueryID; -use xngin_sql::ast::QueryExpr; +use doradb_catalog::Catalog; +use doradb_expr::controlflow::ControlFlow; +use doradb_expr::{GlobalID, QueryID}; +use doradb_sql::ast::QueryExpr; /// LgcPlan represents a self-contained logical plan with /// complete information about all its nodes. @@ -30,6 +30,9 @@ pub struct LgcPlan { /// Such queries are gathered and should be /// executed in parallel. pub attaches: Vec, + /// maximum global id of columns. + /// It is used to generate new columns in optimization phase. + pub max_cid: GlobalID, } impl LgcPlan { @@ -93,11 +96,11 @@ pub(crate) mod tests { use super::*; use crate::join::{Join, JoinGraph, QualifiedJoin}; use std::collections::HashMap; - use xngin_catalog::mem_impl::MemCatalog; - use xngin_catalog::{Catalog, ColumnAttr, ColumnSpec, TableID, TableSpec}; - use xngin_datatype::{Collation, PreciseType}; - use xngin_sql::parser::dialect::MySQL; - use xngin_sql::parser::parse_query; + use doradb_catalog::mem_impl::MemCatalog; + use doradb_catalog::{Catalog, ColumnAttr, ColumnSpec, TableID, TableSpec}; + use doradb_datatype::{Collation, PreciseType}; + use doradb_sql::parser::dialect::MySQL; + use doradb_sql::parser::parse_query; macro_rules! plan_shape { ( $($ok:ident),* ) => { @@ -566,18 +569,14 @@ pub(crate) mod tests { cat.create_table(TableSpec::new( "j", "t0", - vec![ColumnSpec::new( - "c0", - PreciseType::i32(), - ColumnAttr::empty(), - )], + vec![ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::PK)], )) .unwrap(); cat.create_table(TableSpec::new( "j", "t1", vec![ - ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::PK), ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::empty()), ], )) @@ -586,7 +585,7 @@ pub(crate) mod tests { "j", "t2", vec![ - ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::PK), ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c2", PreciseType::i32(), ColumnAttr::empty()), ], @@ -596,8 +595,8 @@ pub(crate) mod tests { "j", "t3", vec![ - ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::empty()), - ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::PK), + ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::PK), ColumnSpec::new("c2", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c3", PreciseType::i32(), ColumnAttr::empty()), ], @@ -606,12 +605,25 @@ pub(crate) mod tests { cat.create_table(TableSpec::new( "j", "t4", + vec![ + ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::PK), + ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::UK), + ColumnSpec::new("c2", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c3", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c4", PreciseType::i32(), ColumnAttr::empty()), + ], + )) + .unwrap(); + cat.create_table(TableSpec::new( + "j", + "t5", vec![ ColumnSpec::new("c0", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c1", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c2", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c3", PreciseType::i32(), ColumnAttr::empty()), ColumnSpec::new("c4", PreciseType::i32(), ColumnAttr::empty()), + ColumnSpec::new("c5", PreciseType::i32(), ColumnAttr::empty()), ], )) .unwrap(); @@ -733,13 +745,13 @@ pub(crate) mod tests { } } - pub(crate) fn get_subq_filt_expr(subq: &Subquery) -> Vec { + pub(crate) fn get_subq_filt_expr(subq: &Subquery) -> Vec { let mut cfe = CollectFiltExpr(vec![]); let _ = subq.root.walk(&mut cfe); cfe.0 } - pub(crate) fn get_table_filt_expr(subq: &Subquery) -> Vec { + pub(crate) fn get_table_filt_expr(subq: &Subquery) -> Vec { let mut cfe = CollectTableFiltExpr(vec![]); let _ = subq.root.walk(&mut cfe); cfe.0 @@ -802,7 +814,7 @@ pub(crate) mod tests { .unwrap() } - struct CollectTableFiltExpr(Vec); + struct CollectTableFiltExpr(Vec); impl OpVisitor for CollectTableFiltExpr { type Cont = (); @@ -819,7 +831,7 @@ pub(crate) mod tests { } } - struct CollectFiltExpr(Vec); + struct CollectFiltExpr(Vec); impl OpVisitor for CollectFiltExpr { type Cont = (); diff --git a/xngin-plan/src/lgc/op.rs b/doradb-plan/src/lgc/op.rs similarity index 90% rename from xngin-plan/src/lgc/op.rs rename to doradb-plan/src/lgc/op.rs index 69282c4..fba2e56 100644 --- a/xngin-plan/src/lgc/op.rs +++ b/doradb-plan/src/lgc/op.rs @@ -14,15 +14,13 @@ use crate::lgc::setop::{Setop, SetopKind, SubqOp}; use semistr::SemiStr; use smallvec::{smallvec, SmallVec}; use std::collections::HashSet; -use std::sync::Arc; -use xngin_catalog::{SchemaID, TableID}; -use xngin_expr::controlflow::ControlFlow; -use xngin_expr::{Effect, ExprKind, GlobalID, QueryID, Setq, INVALID_GLOBAL_ID}; +use doradb_catalog::{SchemaID, TableID}; +use doradb_expr::controlflow::ControlFlow; +use doradb_expr::{Effect, ExprKind, GlobalID, QueryID, Setq, INVALID_GLOBAL_ID}; #[derive(Debug, Clone, Default)] pub struct Op { pub id: GlobalID, - // pub output: OpOutput, pub kind: OpKind, } @@ -31,7 +29,6 @@ impl Op { pub fn new(kind: OpKind) -> Self { Op { id: INVALID_GLOBAL_ID, - // output: OpOutput::default(), kind, } } @@ -40,7 +37,6 @@ impl Op { pub fn empty() -> Self { Op { id: INVALID_GLOBAL_ID, - // output: OpOutput::default(), kind: OpKind::Empty, } } @@ -67,14 +63,14 @@ impl Op { loop { match &op.kind { OpKind::Aggr(aggr) => return Some(&aggr.proj), - OpKind::Proj { cols, .. } => return cols.as_ref().map(|cs| &cs[..]), + OpKind::Proj { cols, .. } => return Some(&cols[..]), OpKind::Row(row) => return row.as_ref().map(|cs| &cs[..]), OpKind::Sort { input, .. } => op = input.as_ref(), OpKind::Limit { input, .. } => op = input.as_ref(), OpKind::Filt { input, .. } => op = input.as_ref(), OpKind::Scan(scan) => return Some(&scan.cols), + OpKind::Setop(setop) => return Some(&setop.cols), OpKind::Query(_) - | OpKind::Setop(_) | OpKind::Join(_) | OpKind::JoinGraph(_) | OpKind::Attach(..) @@ -90,14 +86,14 @@ impl Op { loop { match &mut op.kind { OpKind::Aggr(aggr) => return Some(&mut aggr.proj), - OpKind::Proj { cols, .. } => return cols.as_mut(), + OpKind::Proj { cols, .. } => return Some(cols.as_mut()), OpKind::Row(row) => return row.as_mut(), OpKind::Sort { input, .. } => op = input.as_mut(), OpKind::Limit { input, .. } => op = input.as_mut(), OpKind::Filt { input, .. } => op = input.as_mut(), + OpKind::Setop(setop) => return Some(&mut setop.cols), OpKind::Scan(..) | OpKind::Query(_) - | OpKind::Setop(_) | OpKind::Join(_) | OpKind::JoinGraph(_) | OpKind::Attach(..) @@ -143,23 +139,6 @@ impl Op { } } -#[derive(Debug, Clone, Default)] -pub enum OpOutput { - #[default] - Unspecified, - Ref(Arc>), -} - -impl OpOutput { - #[inline] - pub fn extract(&self) -> Option> { - match self { - OpOutput::Unspecified => None, - OpOutput::Ref(cols) => Some(cols.as_ref().clone()), - } - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OpTy { Proj, @@ -187,10 +166,7 @@ pub enum OpKind { /// It's equivalent to "SELECT ..." clause in SQL statement. /// After plan optimization, this kind of node will be removed /// as we apply projection on each other nodes' output. - Proj { - cols: Option>, - input: Box, - }, + Proj { cols: Vec, input: Box }, /// Filter node. /// /// It's equivalent to "WHERE ..." clause or "HAVING ..." clause in @@ -261,7 +237,6 @@ impl OpKind { #[inline] pub fn table( - qry: QueryID, schema_id: SchemaID, schema: SemiStr, table_id: TableID, @@ -273,7 +248,6 @@ impl OpKind { schema, table_id, table, - qry, cols, filt: vec![], })) @@ -282,7 +256,7 @@ impl OpKind { #[inline] pub fn proj(cols: Vec, input: Op) -> Self { OpKind::Proj { - cols: Some(cols), + cols, input: Box::new(input), } } @@ -329,12 +303,19 @@ impl OpKind { } #[inline] - pub fn setop(kind: SetopKind, q: Setq, left: SubqOp, right: SubqOp) -> Self { + pub fn setop( + kind: SetopKind, + q: Setq, + left: SubqOp, + right: SubqOp, + cols: Vec, + ) -> Self { OpKind::Setop(Box::new(Setop { kind, q, left, right, + cols, })) } @@ -432,13 +413,7 @@ impl OpKind { #[inline] pub fn exprs(&self) -> SmallVec<[&ExprKind; 2]> { match self { - OpKind::Proj { cols, .. } => { - if let Some(cols) = cols { - cols.iter().map(|c| &c.expr).collect() - } else { - smallvec![] - } - } + OpKind::Proj { cols, .. } => cols.iter().map(|c| &c.expr).collect(), OpKind::Filt { pred, .. } => pred.iter().collect(), OpKind::Aggr(aggr) => aggr .groups @@ -453,11 +428,8 @@ impl OpKind { .map(|c| &c.expr) .chain(scan.filt.iter()) .collect(), - OpKind::Limit { .. } - | OpKind::Query(_) - | OpKind::Setop(_) - | OpKind::Empty - | OpKind::Attach(..) => { + OpKind::Setop(setop) => setop.cols.iter().map(|c| &c.expr).collect(), + OpKind::Limit { .. } | OpKind::Query(_) | OpKind::Empty | OpKind::Attach(..) => { smallvec![] } OpKind::Join(j) => match j.as_ref() { @@ -480,13 +452,7 @@ impl OpKind { #[inline] pub fn exprs_mut(&mut self) -> SmallVec<[&mut ExprKind; 2]> { match self { - OpKind::Proj { cols, .. } => { - if let Some(cols) = cols { - cols.iter_mut().map(|c| &mut c.expr).collect() - } else { - smallvec![] - } - } + OpKind::Proj { cols, .. } => cols.iter_mut().map(|c| &mut c.expr).collect(), OpKind::Filt { pred, .. } => pred.iter_mut().collect(), OpKind::Aggr(aggr) => aggr .groups @@ -501,11 +467,8 @@ impl OpKind { .map(|c| &mut c.expr) .chain(scan.filt.iter_mut()) .collect(), - OpKind::Limit { .. } - | OpKind::Query(_) - | OpKind::Setop(_) - | OpKind::Empty - | OpKind::Attach(..) => { + OpKind::Setop(setop) => setop.cols.iter_mut().map(|c| &mut c.expr).collect(), + OpKind::Limit { .. } | OpKind::Query(_) | OpKind::Empty | OpKind::Attach(..) => { smallvec![] } OpKind::Join(j) => match j.as_mut() { @@ -583,7 +546,7 @@ pub struct TableScan { pub schema: SemiStr, pub table_id: TableID, pub table: SemiStr, - pub qry: QueryID, + // pub qry: QueryID, pub cols: Vec, // filter expression that can be pushed down to scan. pub filt: Vec, diff --git a/xngin-plan/src/lgc/query.rs b/doradb-plan/src/lgc/query.rs similarity index 93% rename from xngin-plan/src/lgc/query.rs rename to doradb-plan/src/lgc/query.rs index a4b3cfd..fd161b7 100644 --- a/xngin-plan/src/lgc/query.rs +++ b/doradb-plan/src/lgc/query.rs @@ -8,8 +8,8 @@ use std::collections::HashSet; use std::mem; use std::ops::ControlFlow; use std::ops::Deref; -use xngin_catalog::{SchemaID, TableID}; -use xngin_expr::QueryID; +use doradb_catalog::{SchemaID, TableID}; +use doradb_expr::{GlobalID, QueryID, INVALID_QUERY_ID}; /// Query wraps logical operator with additional syntax information. /// group operators as a tree, with output column list. @@ -128,14 +128,22 @@ pub enum QryIDs { } /// QuerySet stores all sub-subqeries and provide lookup and update methods. -#[derive(Debug, Default)] +/// QueryID should start from 1, so initialize query set with one empty subquery. +#[derive(Debug)] pub struct QuerySet(Vec); +impl Default for QuerySet { + #[inline] + fn default() -> Self { + QuerySet(vec![Subquery::empty()]) + } +} + impl QuerySet { #[inline] pub fn insert_empty(&mut self) -> (QueryID, &mut Subquery) { - // let query = Subquery::default(); let qry_id = self.0.len(); + assert_ne!(qry_id, INVALID_QUERY_ID.value() as usize); self.0.push(Subquery::empty()); (QueryID::from(qry_id as u32), &mut self.0[qry_id]) } @@ -193,6 +201,12 @@ impl QuerySet { } } + /// Returns root operator id of given query. + #[inline] + pub fn qry_root_id(&self, qry_id: &QueryID) -> Option { + self.get(qry_id).map(|subq| subq.root.id) + } + #[inline] fn upsert_query(&mut self, mut sq: Subquery) -> QueryID { let mut mapping = FnvHashMap::default(); diff --git a/xngin-plan/src/lgc/reflect.rs b/doradb-plan/src/lgc/reflect.rs similarity index 99% rename from xngin-plan/src/lgc/reflect.rs rename to doradb-plan/src/lgc/reflect.rs index 66d9193..40c500c 100644 --- a/xngin-plan/src/lgc/reflect.rs +++ b/doradb-plan/src/lgc/reflect.rs @@ -1,16 +1,16 @@ use crate::error::{Error, Result}; use crate::join::{self, Join, JoinKind, JoinOp}; use crate::lgc::col::{AliasKind, ProjCol}; -use crate::lgc::op::{Op, OpKind, OpOutput, SortItem}; +use crate::lgc::op::{Op, OpKind, SortItem}; use crate::lgc::query::QuerySet; use crate::lgc::LgcPlan; use aosa::StringArena; use std::collections::hash_map::Entry; use std::collections::HashMap; -use xngin_catalog::{Catalog, SchemaID, TableID}; -use xngin_datatype::{self as datatype, TimeUnit, DEFAULT_DATE_FORMAT}; -use xngin_expr::{self as expr, ColKind, Const, ExprKind, Farg, FuncKind, PredFuncKind, QueryID}; -use xngin_sql::ast::*; +use doradb_catalog::Catalog; +use doradb_datatype::{self as datatype, TimeUnit, DEFAULT_DATE_FORMAT}; +use doradb_expr::{self as expr, ColKind, Const, ExprKind, Farg, FuncKind, PredFuncKind, QueryID}; +use doradb_sql::ast::*; #[inline] pub fn reflect<'a, C: Catalog>( @@ -102,11 +102,6 @@ fn reflect_op<'a, C: Catalog>( let res = match &op.kind { OpKind::Proj { cols, input } => { let child = reflect_op(ctx, arena, qs, root, input, catalog)?; - let cols = if let Some(cols) = cols { - &cols[..] - } else { - return Err(Error::InvalidPlanStructureForReflection); - }; reflect_proj(ctx, arena, cols, qs, child, catalog)? } OpKind::Filt { pred, input } => { @@ -1057,6 +1052,7 @@ fn transform_expr<'a, C: Catalog>( let col_alias = Ident::regular(arena.add(col.alias.as_str())?); Expr::ColumnRef(vec![tbl_alias, col_alias]) } + ColKind::Setop(..) => unreachable!("setop columns are not transformable"), }, ExprKind::Pred(pred) => transform_pred(ctx, arena, qs, pred, catalog)?, ExprKind::Aggf { kind, q, arg } => transform_aggf(ctx, arena, qs, *kind, *q, arg, catalog)?, @@ -1433,6 +1429,8 @@ fn transform_const<'a>( let value = arena.add(&value)?; Literal::Interval(Interval { unit, value }) } + Const::Bool(b) => Literal::Bool(*b), + Const::Null => Literal::Null, _ => todo!(), }; Ok(res) @@ -1590,7 +1588,7 @@ fn next_avail_tbl_alias_id<'a>( mod tests { use super::*; use crate::lgc::tests::{build_plan, j_catalog}; - use xngin_sql::pretty::{PrettyConf, PrettyFormat}; + use doradb_sql::pretty::{PrettyConf, PrettyFormat}; #[test] fn test_reflect_queries() { diff --git a/xngin-plan/src/lgc/resolv.rs b/doradb-plan/src/lgc/resolv.rs similarity index 99% rename from xngin-plan/src/lgc/resolv.rs rename to doradb-plan/src/lgc/resolv.rs index 1a0ec07..ebc2fb8 100644 --- a/xngin-plan/src/lgc/resolv.rs +++ b/doradb-plan/src/lgc/resolv.rs @@ -3,12 +3,12 @@ use crate::lgc::col::{ColGen, ProjCol}; use crate::lgc::query::Subquery; use semistr::SemiStr; use std::sync::Arc; -use xngin_catalog::Catalog; -use xngin_datatype::{Date, Decimal, TimeUnit, DEFAULT_DATE_FORMAT}; -use xngin_expr::{ +use doradb_catalog::Catalog; +use doradb_datatype::{Date, Decimal, TimeUnit, DEFAULT_DATE_FORMAT}; +use doradb_expr::{ self as expr, ColIndex, ExprKind, Farg, FuncKind, Pred, PredFuncKind, QueryID, SubqKind, }; -use xngin_sql::ast::*; +use doradb_sql::ast::*; #[derive(Debug)] pub enum Resolution { diff --git a/xngin-plan/src/lgc/scope.rs b/doradb-plan/src/lgc/scope.rs similarity index 97% rename from xngin-plan/src/lgc/scope.rs rename to doradb-plan/src/lgc/scope.rs index 30ccfe0..1189c47 100644 --- a/xngin-plan/src/lgc/scope.rs +++ b/doradb-plan/src/lgc/scope.rs @@ -4,7 +4,7 @@ use indexmap::IndexMap; use semistr::SemiStr; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; -use xngin_expr::{ColIndex, ExprKind, GlobalID, QueryID}; +use doradb_expr::{ColIndex, ExprKind, GlobalID, QueryID}; // Scopes is stack-like environment for query blocks. #[derive(Debug, Default)] diff --git a/xngin-plan/src/lgc/setop.rs b/doradb-plan/src/lgc/setop.rs similarity index 95% rename from xngin-plan/src/lgc/setop.rs rename to doradb-plan/src/lgc/setop.rs index 1d1669e..5eccae6 100644 --- a/xngin-plan/src/lgc/setop.rs +++ b/doradb-plan/src/lgc/setop.rs @@ -1,7 +1,8 @@ use crate::error::{Error, Result}; use crate::lgc::op::{Op, OpKind}; +use crate::lgc::ProjCol; use std::ops::{Deref, DerefMut}; -use xngin_expr::{GlobalID, QueryID, Setq}; +use doradb_expr::{QueryID, Setq}; #[derive(Debug, Clone)] pub struct Setop { @@ -10,6 +11,7 @@ pub struct Setop { /// Sources of Setop are always subqueries. pub left: SubqOp, pub right: SubqOp, + pub cols: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/xngin-plan/src/lib.rs b/doradb-plan/src/lib.rs similarity index 100% rename from xngin-plan/src/lib.rs rename to doradb-plan/src/lib.rs diff --git a/xngin-plan/src/phy/builder.rs b/doradb-plan/src/phy/builder.rs similarity index 97% rename from xngin-plan/src/phy/builder.rs rename to doradb-plan/src/phy/builder.rs index 963b918..d931f5e 100644 --- a/xngin-plan/src/phy/builder.rs +++ b/doradb-plan/src/phy/builder.rs @@ -2,8 +2,8 @@ use super::*; use crate::digraph::{DiGraph, NodeIndex}; use crate::error::{Error, Result}; use crate::lgc::{Location, Op, OpKind, QuerySet, Subquery}; -use xngin_compute::eval::{QueryEvalPlan, TableEvalPlan}; -use xngin_expr::TypeInferer; +use doradb_compute::eval::{QueryEvalPlan, TableEvalPlan}; +use doradb_expr::TypeInferer; pub struct PhyBuilder<'a, I> { graph: DiGraph, diff --git a/xngin-plan/src/phy/mod.rs b/doradb-plan/src/phy/mod.rs similarity index 88% rename from xngin-plan/src/phy/mod.rs rename to doradb-plan/src/phy/mod.rs index 62fa080..ff155fa 100644 --- a/xngin-plan/src/phy/mod.rs +++ b/doradb-plan/src/phy/mod.rs @@ -4,8 +4,8 @@ use crate::digraph::{DiGraph, NodeIndex}; use crate::error::{Error, Result}; use crate::lgc::LgcPlan; use builder::PhyBuilder; -use xngin_catalog::TableID; -use xngin_compute::eval::{QueryEvalPlan, TableEvalPlan}; +use doradb_catalog::TableID; +use doradb_compute::eval::{QueryEvalPlan, TableEvalPlan}; /// PhyPlan is a directed graph transformed from LgcPlan. /// One node can have multiple downstreams, e.g CTE node. @@ -53,9 +53,9 @@ pub struct PhyTableScan { mod tests { use super::*; use crate::lgc::LgcPlan; - use xngin_catalog::mem_impl::MemCatalog; - use xngin_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; - use xngin_sql::parser::{dialect, parse_query_verbose}; + use doradb_catalog::mem_impl::MemCatalog; + use doradb_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; + use doradb_sql::parser::{dialect, parse_query_verbose}; // #[test] // fn test_build_phy() { diff --git a/xngin-plan/src/rule/col_prune.rs b/doradb-plan/src/rule/col_prune.rs similarity index 98% rename from xngin-plan/src/rule/col_prune.rs rename to doradb-plan/src/rule/col_prune.rs index 3aef121..8ca4347 100644 --- a/xngin-plan/src/rule/col_prune.rs +++ b/doradb-plan/src/rule/col_prune.rs @@ -4,8 +4,8 @@ use crate::rule::RuleEffect; use fnv::FnvHashMap; use std::collections::BTreeMap; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{Col, ColIndex, ColKind, ExprKind, ExprMutVisitor, ExprVisitor, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{Col, ColIndex, ColKind, ExprKind, ExprExt, ExprMutVisitor, ExprVisitor, QueryID}; /// Column pruning will remove unnecessary columns from the given plan. /// It is invoked top down. First collect all output columns from current @@ -147,7 +147,7 @@ impl ExprMutVisitor for Modify<'_> { fn modify_subq(subq: &mut Subquery, mapping: Option<&BTreeMap>) { match &mut subq.root.kind { OpKind::Proj { cols, .. } => { - *cols = Some(retain(cols.take().unwrap(), mapping)); + *cols = retain(mem::take(cols), mapping); } OpKind::Aggr(aggr) => { aggr.proj = retain(mem::take(&mut aggr.proj), mapping); diff --git a/xngin-plan/src/rule/derived_unfold.rs b/doradb-plan/src/rule/derived_unfold.rs similarity index 96% rename from xngin-plan/src/rule/derived_unfold.rs rename to doradb-plan/src/rule/derived_unfold.rs index 5783175..16a6f12 100644 --- a/xngin-plan/src/rule/derived_unfold.rs +++ b/doradb-plan/src/rule/derived_unfold.rs @@ -6,8 +6,8 @@ use crate::lgc::{ use crate::rule::RuleEffect; use std::collections::HashMap; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{Col, ColIndex, ColKind, ExprKind, ExprMutVisitor, QueryCol, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{Col, ColIndex, ColKind, ExprKind, ExprExt, ExprMutVisitor, QryCol, QueryID}; /// Unfold derived table. /// @@ -28,7 +28,7 @@ pub fn derived_unfold(qry_set: &mut QuerySet, qry_id: QueryID) -> Result, + mapping: &mut HashMap, mode: Mode, ) -> Result { qry_set.transform_op(qry_id, |qry_set, loc, op| { @@ -53,7 +53,7 @@ struct Unfold<'a> { qry_set: &'a mut QuerySet, stack: Vec, // map query column with position to inner expression. - mapping: &'a mut HashMap, + mapping: &'a mut HashMap, mode: Mode, } @@ -61,7 +61,7 @@ impl<'a> Unfold<'a> { #[inline] fn new( qry_set: &'a mut QuerySet, - mapping: &'a mut HashMap, + mapping: &'a mut HashMap, mode: Mode, ) -> Self { Unfold { @@ -164,7 +164,7 @@ impl OpMutVisitor for Unfold<'_> { // the mapping between original columns to unfolded expressions for (idx, c) in out_cols.into_iter().enumerate() { self.mapping - .insert((*qry_id, ColIndex::from(idx as u32)), c.expr); + .insert(QryCol(*qry_id, ColIndex::from(idx as u32)), c.expr); } *op = new_op; ControlFlow::Continue(RuleEffect::OPEXPR) @@ -324,14 +324,14 @@ fn extract(op: &mut Op) -> (Op, Vec) { Op { kind: OpKind::Proj { cols, input }, .. - } => (*input, cols.unwrap()), + } => (*input, cols), _ => unreachable!(), } } #[inline] -fn rewrite_exprs(op: &mut Op, mapping: &HashMap) -> RuleEffect { - struct Rewrite<'a>(&'a HashMap); +fn rewrite_exprs(op: &mut Op, mapping: &HashMap) -> RuleEffect { + struct Rewrite<'a>(&'a HashMap); impl ExprMutVisitor for Rewrite<'_> { type Cont = RuleEffect; type Break = (); @@ -343,7 +343,8 @@ fn rewrite_exprs(op: &mut Op, mapping: &HashMap) -> RuleEffe .. }) = e { - if let Some(new) = self.0.get(&(*qry_id, *idx)) { + let qry_col = QryCol(*qry_id, *idx); + if let Some(new) = self.0.get(&qry_col) { *e = new.clone(); return ControlFlow::Continue(RuleEffect::EXPR); } diff --git a/xngin-plan/src/rule/expr_simplify.rs b/doradb-plan/src/rule/expr_simplify.rs similarity index 98% rename from xngin-plan/src/rule/expr_simplify.rs rename to doradb-plan/src/rule/expr_simplify.rs index e327451..e568764 100644 --- a/xngin-plan/src/rule/expr_simplify.rs +++ b/doradb-plan/src/rule/expr_simplify.rs @@ -5,12 +5,12 @@ use crate::rule::RuleEffect; use indexmap::{IndexMap, IndexSet}; use std::cmp::Ordering; use std::mem; -use xngin_datatype::AlignPartialOrd; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::fold::*; -use xngin_expr::{ - Col, ColIndex, ColKind, Const, ExprKind, ExprMutVisitor, FuncKind, GlobalID, Pred, - PredFuncKind, QueryCol, QueryID, +use doradb_datatype::AlignPartialOrd; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::fold::*; +use doradb_expr::{ + Col, ColIndex, ColKind, Const, ExprKind, ExprExt, ExprMutVisitor, FuncKind, GlobalID, Pred, + PredFuncKind, QryCol, QueryID, }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -157,7 +157,7 @@ pub(crate) fn normalize_single(e: &mut ExprKind) { [ExprKind::Col(c1), ExprKind::Col(c2)] => { // if e1 and e2 are columns, fixed the order so that // "e1 cmp e2" always has InternalOrder(e1) < InternalOrder(e2) - if c1 > c2 { + if c1.gid > c2.gid { mem::swap(c1, c2); *kind = flipped_kind; } @@ -512,7 +512,7 @@ fn simplify_conj_short_circuit( eff: &mut RuleEffect, ) -> Option { let mut eset = IndexSet::new(); - let mut cmps: IndexMap = IndexMap::new(); + let mut cmps: IndexMap = IndexMap::new(); for e in es.drain(..) { match &e { ExprKind::Const(c) => { @@ -554,7 +554,7 @@ fn simplify_conj_short_circuit( }), ExprKind::Const(new_c)], ) => { let ent = - cmps.entry((*qry_id, *idx)) + cmps.entry(QryCol(*qry_id, *idx)) .or_insert_with(|| QueryColPredicates { gid: *gid, preds: vec![], @@ -586,7 +586,7 @@ fn simplify_conj_short_circuit( } } } - for ((qid, idx), mut pes) in cmps { + for (QryCol(qid, idx), mut pes) in cmps { if pes.preds.len() == 1 { // keep it as is let (kind, c) = pes.preds.pop().unwrap(); @@ -2269,13 +2269,19 @@ mod tests { ); } - fn assert_eq_filt_expr(s1: &str, mut q1: LgcPlan, _s2: &str, q2: LgcPlan) { + fn assert_eq_filt_expr(s1: &str, mut q1: LgcPlan, s2: &str, q2: LgcPlan) { + use crate::lgc::reflect; + use aosa::StringArena; expr_simplify(&mut q1.qry_set, q1.root).unwrap(); print_plan(s1, &q1); - assert_eq!( - get_subq_filt_expr(q1.root_query().unwrap()), - get_subq_filt_expr(q2.root_query().unwrap()) - ); + print_plan(s2, &q2); + // as we add GlobalID to column, it's not safe to compare expression. + // instead we convert expression to AST and then compare. + let cat = j_catalog(); + let arena = StringArena::with_capacity(1024); + let stmt1 = reflect(&q1, &arena, &cat).unwrap(); + let stmt2 = reflect(&q1, &arena, &cat).unwrap(); + assert_eq!(stmt1, stmt2); } fn assert_no_filt_expr(s1: &str, mut q1: LgcPlan) { diff --git a/xngin-plan/src/rule/fix.rs b/doradb-plan/src/rule/fix.rs similarity index 98% rename from xngin-plan/src/rule/fix.rs rename to doradb-plan/src/rule/fix.rs index bb56191..c4fafd3 100644 --- a/xngin-plan/src/rule/fix.rs +++ b/doradb-plan/src/rule/fix.rs @@ -4,10 +4,10 @@ use crate::lgc::{Aggr, Op, OpOutput, OpKind, OpMutVisitor, QuerySet, ProjCol}; use std::collections::HashMap; use std::sync::Arc; use std::mem; -use xngin_datatype::PreciseType; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::util::{fix_bools, TypeFix}; -use xngin_expr::{GlobalID, Col, ColKind, ColIndex, Expr, ExprKind, QueryID}; +use doradb_datatype::PreciseType; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::util::{fix_bools, TypeFix}; +use doradb_expr::{GlobalID, Col, ColKind, ColIndex, Expr, ExprKind, QueryID}; /// Fix types of all expressions and generate intra columns for execution. /// In initialization of logical plan, all type info, except table columns, are left as empty. @@ -249,8 +249,8 @@ mod tests { use super::*; use crate::lgc::tests::{build_plan, get_filt_expr, print_plan}; use crate::lgc::LgcPlan; - use xngin_catalog::mem_impl::MemCatalog; - use xngin_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; + use doradb_catalog::mem_impl::MemCatalog; + use doradb_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; macro_rules! vec2 { ($e:expr) => { @@ -260,7 +260,7 @@ mod tests { #[test] fn test_type_fix_add_sub() { - use xngin_datatype::PreciseType as PT; + use doradb_datatype::PreciseType as PT; let cat = ty_catalog(); for (sql, tys) in vec![ // constants @@ -367,7 +367,7 @@ mod tests { #[test] fn test_type_fix_cmp() { - use xngin_datatype::PreciseType as PT; + use doradb_datatype::PreciseType as PT; let cat = ty_catalog(); for (sql, tys) in vec![ // float diff --git a/xngin-plan/src/rule/joingraph_initialize.rs b/doradb-plan/src/rule/joingraph_initialize.rs similarity index 99% rename from xngin-plan/src/rule/joingraph_initialize.rs rename to doradb-plan/src/rule/joingraph_initialize.rs index a976e2e..3f8808d 100644 --- a/xngin-plan/src/rule/joingraph_initialize.rs +++ b/doradb-plan/src/rule/joingraph_initialize.rs @@ -6,8 +6,8 @@ use bitflags::bitflags; use indexmap::IndexMap; use std::collections::HashSet; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{ExprKind, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{ExprKind, ExprExt, QueryID}; bitflags! { struct Spec: u8 { diff --git a/xngin-plan/src/rule/mod.rs b/doradb-plan/src/rule/mod.rs similarity index 95% rename from xngin-plan/src/rule/mod.rs rename to doradb-plan/src/rule/mod.rs index 4fabf32..750d07b 100644 --- a/xngin-plan/src/rule/mod.rs +++ b/doradb-plan/src/rule/mod.rs @@ -4,19 +4,19 @@ use crate::error::Result; use crate::lgc::{LgcPlan, QuerySet}; use bitflags::bitflags; -use xngin_expr::{Effect, QueryID}; +use doradb_expr::{Effect, QueryID}; -pub(self) mod assign_id; pub mod col_prune; pub mod derived_unfold; pub mod expr_simplify; pub mod joingraph_initialize; pub mod op_eliminate; +pub(self) mod op_id; pub mod outerjoin_reduce; pub mod pred_move; -pub mod pred_pullup; +pub mod pred_pullup_old; pub mod pred_pushdown; -// pub mod output_fix; +pub mod pred_pushdown_old; pub use col_prune::col_prune; pub use derived_unfold::derived_unfold; @@ -24,8 +24,8 @@ pub use expr_simplify::expr_simplify; pub use joingraph_initialize::joingraph_initialize; pub use op_eliminate::op_eliminate; pub use outerjoin_reduce::outerjoin_reduce; -pub use pred_pullup::pred_pullup; -pub use pred_pushdown::pred_pushdown; +pub use pred_pullup_old::pred_pullup; +pub use pred_pushdown_old::pred_pushdown; // pub use fix::fix; bitflags! { diff --git a/xngin-plan/src/rule/op_eliminate.rs b/doradb-plan/src/rule/op_eliminate.rs similarity index 97% rename from xngin-plan/src/rule/op_eliminate.rs rename to doradb-plan/src/rule/op_eliminate.rs index 3021569..10000e8 100644 --- a/xngin-plan/src/rule/op_eliminate.rs +++ b/doradb-plan/src/rule/op_eliminate.rs @@ -5,8 +5,8 @@ use crate::rule::expr_simplify::{update_simplify_nested, NullCoalesce}; use crate::rule::RuleEffect; use std::collections::HashSet; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{Col, ColKind, Const, ExprKind, QueryID, Setq}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{Col, ColKind, Const, ExprKind, QueryID, Setq}; /// Eliminate redundant operators. /// 1. Filter with true predicate can be removed. @@ -122,6 +122,7 @@ impl<'a> EliminateOp<'a> { q, left, right, + .. } = so.as_mut(); match (&mut left.kind, &mut right.kind) { (OpKind::Empty, OpKind::Empty) => { @@ -581,7 +582,7 @@ mod tests { _ => (), })); if let OpKind::Proj { cols, .. } = &subq.root.kind { - assert_eq!(&cols.as_ref().unwrap()[0].expr, &ExprKind::const_null()); + assert_eq!(&cols[0].expr, &ExprKind::const_null()); } else { panic!("fail") } @@ -605,7 +606,7 @@ mod tests { _ => (), })); if let OpKind::Proj { cols, .. } = &subq.root.kind { - assert_ne!(&cols.as_ref().unwrap()[0].expr, &ExprKind::const_null()); + assert_ne!(&cols[0].expr, &ExprKind::const_null()); } else { panic!("fail") } @@ -634,8 +635,8 @@ mod tests { _ => (), })); if let OpKind::Proj{cols, ..} = &subq.root.kind { - assert_eq!(&cols.as_ref().unwrap()[0].expr, &ExprKind::const_null()); - assert_ne!(&cols.as_ref().unwrap()[1].expr, &ExprKind::const_null()); + assert_eq!(&cols[0].expr, &ExprKind::const_null()); + assert_ne!(&cols[1].expr, &ExprKind::const_null()); } else { panic!("fail") } @@ -653,8 +654,8 @@ mod tests { _ => (), })); if let OpKind::Proj{cols, ..} = &subq.root.kind { - assert_ne!(&cols.as_ref().unwrap()[0].expr, &ExprKind::const_null()); - assert_eq!(&cols.as_ref().unwrap()[1].expr, &ExprKind::const_null()); + assert_ne!(&cols[0].expr, &ExprKind::const_null()); + assert_eq!(&cols[1].expr, &ExprKind::const_null()); } else { panic!("fail") } diff --git a/xngin-plan/src/rule/assign_id.rs b/doradb-plan/src/rule/op_id.rs similarity index 57% rename from xngin-plan/src/rule/assign_id.rs rename to doradb-plan/src/rule/op_id.rs index 267a734..8eb56ae 100644 --- a/xngin-plan/src/rule/assign_id.rs +++ b/doradb-plan/src/rule/op_id.rs @@ -1,12 +1,14 @@ use crate::error::{Error, Result}; use crate::lgc::{Op, OpKind, OpMutVisitor, QuerySet}; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{GlobalID, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{GlobalID, QueryID, INVALID_GLOBAL_ID}; +/// Assign unique id to each operator. #[inline] -pub(super) fn assign_id(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { - let mut gid = GlobalID::from(1); - do_assign_id(qry_set, qry_id, &mut gid) +pub(super) fn assign_id(qry_set: &mut QuerySet, qry_id: QueryID) -> Result { + let mut gid = INVALID_GLOBAL_ID; + do_assign_id(qry_set, qry_id, &mut gid)?; + Ok(gid) } #[inline] @@ -17,6 +19,15 @@ fn do_assign_id(qry_set: &mut QuerySet, qry_id: QueryID, gid: &mut GlobalID) -> })? } +/// Reset unique id to 0 for each operator. +#[inline] +pub(super) fn reset_id(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { + qry_set.transform_op(qry_id, |qry_set, _, op| { + let mut ri = ResetID(qry_set); + op.walk_mut(&mut ri).unbranch() + })? +} + struct AssignID<'a> { qry_set: &'a mut QuerySet, gid: &'a mut GlobalID, @@ -31,11 +42,33 @@ impl OpMutVisitor for AssignID<'_> { match &mut op.kind { OpKind::Query(qid) => { do_assign_id(self.qry_set, *qid, self.gid).branch()?; - op.id = self.gid.fetch_inc(); + op.id = self.gid.inc_fetch(); + ControlFlow::Continue(()) + } + _ => { + op.id = self.gid.inc_fetch(); + ControlFlow::Continue(()) + } + } + } +} + +struct ResetID<'a>(&'a mut QuerySet); + +impl OpMutVisitor for ResetID<'_> { + type Cont = (); + type Break = Error; + + #[inline] + fn leave(&mut self, op: &mut Op) -> ControlFlow { + match &mut op.kind { + OpKind::Query(qid) => { + reset_id(self.0, *qid).branch()?; + op.id = INVALID_GLOBAL_ID; ControlFlow::Continue(()) } _ => { - op.id = self.gid.fetch_inc(); + op.id = INVALID_GLOBAL_ID; ControlFlow::Continue(()) } } @@ -72,6 +105,34 @@ mod tests { ) } + #[test] + fn test_reset_id() { + assert_j_plan("select 1 from t1", |sql, mut plan| { + assign_id(&mut plan.qry_set, plan.root).unwrap(); + reset_id(&mut plan.qry_set, plan.root).unwrap(); + print_plan(sql, &plan); + let ids = collect_id(&mut plan.qry_set, plan.root).unwrap(); + assert_eq!(ids, vec![0; 3]); + }); + assert_j_plan("select c1 from t1 where c0 = 0", |sql, mut plan| { + assign_id(&mut plan.qry_set, plan.root).unwrap(); + reset_id(&mut plan.qry_set, plan.root).unwrap(); + print_plan(sql, &plan); + let ids = collect_id(&mut plan.qry_set, plan.root).unwrap(); + assert_eq!(ids, vec![0; 4]); + }); + assert_j_plan( + "select 1 from t1, t3 where t1.c1 = t3.c3", + |sql, mut plan| { + assign_id(&mut plan.qry_set, plan.root).unwrap(); + reset_id(&mut plan.qry_set, plan.root).unwrap(); + print_plan(sql, &plan); + let ids = collect_id(&mut plan.qry_set, plan.root).unwrap(); + assert_eq!(ids, vec![0; 7]); + }, + ) + } + fn collect_id(qry_set: &mut QuerySet, qry_id: QueryID) -> Result> { let mut ids = vec![]; do_collect_id(qry_set, qry_id, &mut ids)?; diff --git a/xngin-plan/src/rule/outerjoin_reduce.rs b/doradb-plan/src/rule/outerjoin_reduce.rs similarity index 98% rename from xngin-plan/src/rule/outerjoin_reduce.rs rename to doradb-plan/src/rule/outerjoin_reduce.rs index bf1f3d4..4e43be8 100644 --- a/xngin-plan/src/rule/outerjoin_reduce.rs +++ b/doradb-plan/src/rule/outerjoin_reduce.rs @@ -5,9 +5,9 @@ use crate::rule::expr_simplify::{simplify_single, NullCoalesce}; use crate::rule::RuleEffect; use std::collections::{HashMap, HashSet}; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::fold::Fold; -use xngin_expr::{Col, ColKind, ExprKind, ExprMutVisitor, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::fold::Fold; +use doradb_expr::{Col, ColKind, ExprKind, ExprExt, ExprMutVisitor, QueryID}; /// Reduce outer join based on predicate analysis. /// This rule recognize null rejecting predicates in advance @@ -205,7 +205,7 @@ fn translate_rn_exprs( exprs: &[ExprKind], mapping: &[ProjCol], ) -> Result>> { - let mut res = HashMap::new(); + let mut res: HashMap> = HashMap::new(); let mut tmp = HashSet::new(); for e in exprs { let mut new_e = e.clone(); @@ -222,9 +222,7 @@ fn translate_rn_exprs( } for new_qid in &tmp { if reject_null_single(&new_e, *new_qid)? { - res.entry(*new_qid) - .or_insert_with(Vec::new) - .push(new_e.clone()); + res.entry(*new_qid).or_default().push(new_e.clone()); } } } diff --git a/doradb-plan/src/rule/pred_move.rs b/doradb-plan/src/rule/pred_move.rs new file mode 100644 index 0000000..5110cd8 --- /dev/null +++ b/doradb-plan/src/rule/pred_move.rs @@ -0,0 +1,2732 @@ +use crate::error::{Error, Result}; +use crate::join::{Join, JoinKind, JoinOp, QualifiedJoin}; +use crate::lgc::{Op, OpKind, OpMutVisitor, ProjCol, QuerySet, SetopKind}; +use crate::rule::op_id::assign_id; +use smallvec::{smallvec, SmallVec}; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; +use std::mem; +use doradb_catalog::{Catalog, Key, TableID, TblCol}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{ + Col, ColIndex, ColKind, Const, ExprKind, ExprExt, ExprMutVisitor, ExprVisitor, FnlDep, GlobalID, Pred, + PredFuncKind, QueryID, QryCol, +}; + +/// Predicate Movearound +/// +/// This is a combination of prediate pushdown(PPD) and predicate pullup(PPU). +/// PPU pulls all predicates as high as possible, and store them into a separate +/// map, which can be pushed in future. Meanwhile, equal sets of columns will +/// be generated. +/// +#[inline] +pub fn pred_move(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { + assign_id(qry_set, qry_id)?; + todo!() +} + +/// Predicate pullup +/// +/// Traverse the query plan and pull up predicates if possible, +/// returns the inner set of root operator +#[inline] +fn pred_pullup( + catalog: &C, + qry_set: &mut QuerySet, + qry_id: QueryID, + pred_map: &mut PredMap, + dep_cols: &mut DepCols, +) -> Result<()> { + qry_set.transform_op(qry_id, |qry_set, _, op| { + let mut ppu = PredPullup { + catalog, + qry_set, + pred_map, + dep_cols, + }; + op.walk_mut(&mut ppu).unbranch() + })? +} + +#[inline] +fn clear_filt(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { + qry_set.transform_op(qry_id, |qry_set, _, op| { + let mut cf = ClearFilt(qry_set); + op.walk_mut(&mut cf).unbranch() + })? +} + +/// PredMap stores inner set and outer set of each node. +/// The inner set is free to pull or push across nodes. +/// The outer set is only for join node itself. +#[derive(Debug, Default)] +pub struct PredMap { + pub(super) inner: HashMap, + pub(super) outer: HashMap, + pub(super) qry_col_map: HashMap, + // note that same table can be referred in a query multiple times. + // so global id is stored in a set. + pub(super) tbl_col_map: HashMap>, + pub(super) col_map: HashMap, + pub(super) tbl_dep_map: HashMap>>, + // pub(super) tbl_occurrence: HashMap, +} + +impl PredMap { + /// remove and get a inner predicate set of given node. + #[inline] + pub(super) fn remove_inner(&mut self, op_id: GlobalID) -> InnerSet { + self.inner.remove(&op_id).unwrap_or_default() + } + + #[inline] + pub(super) fn get_inner(&self, op_id: GlobalID) -> Option<&InnerSet> { + self.inner.get(&op_id) + } + + #[inline] + pub(super) fn merge_inner(&mut self, op_id: GlobalID, mut inner: InnerSet) { + if inner.is_empty() { + return + } + match self.inner.entry(op_id) { + Entry::Occupied(occ) => { + + inner.merge(occ.remove()); + self.inner.insert(op_id, inner); + } + Entry::Vacant(vac) => { + vac.insert(inner); + } + } + } + + #[inline] + pub(super) fn extract_and_merge_inner(&mut self, op_id: GlobalID, mut inner: InnerSet) -> InnerSet { + if inner.is_empty() { + return self.remove_inner(op_id) + } + match self.inner.entry(op_id) { + Entry::Occupied(occ) => { + inner.merge(occ.remove()); + } + Entry::Vacant(vac) => (), + } + inner + } + + #[inline] + pub(super) fn insert_inner(&mut self, op_id: GlobalID, inner_set: InnerSet) { + self.inner.insert(op_id, inner_set); + } + + #[inline] + pub(super) fn insert_outer(&mut self, op_id: GlobalID, outer_set: OuterSet) { + self.outer.insert(op_id, outer_set); + } + + #[inline] + pub(super) fn cols_eq_pred(&self, l_gid: GlobalID, r_gid: GlobalID) -> ExprKind { + let l_col = self.col_map[&l_gid].clone(); + let r_col = self.col_map[&r_gid].clone(); + ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(l_col), ExprKind::Col(r_col)], + ) + } + + #[inline] + pub fn apply_inner(&self, inner: InnerSet, dst: &mut Vec) { + // equal set to expression + for eq_set in inner.eq_sets { + let mut eq_set_iter = eq_set.into_iter(); + let l_gid = eq_set_iter.next().unwrap(); + while let Some(r_gid) = eq_set_iter.next() { + let e = self.cols_eq_pred(l_gid, r_gid); + dst.push(e); + } + } + // filter expression + for p in inner.filt { + let e = self.reflect_pred(p); + dst.push(e); + } + // do nothing to functional dependencies + } + + #[inline] + pub(super) fn reflect_pred(&self, p: PredExpr) -> ExprKind { + match p { + PredExpr::OneCol(_, e) | PredExpr::Other(e) => e, + PredExpr::ColEqConst(gid, cst) => { + let col = self.col_map[&gid].clone(); + ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(col), ExprKind::Const(cst)], + ) + } + PredExpr::TwoColEq(gid1, gid2) => { + let col1 = self.col_map[&gid1].clone(); + let col2 = self.col_map[&gid2].clone(); + ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(col1), ExprKind::Col(col2)], + ) + } + } + } +} + +/// InnerSet stores predicates and equal set of columns. +/// The columns source from inner join and filter operator. +/// +/// e.g.1. t1 JOIN t2 ON t1.c1 = t2.c2 JOIN t3 ON t1.c1 = t3.c3 +/// inner=[{t1.c1, t2.c2, t3.c3}], outer={} +/// +/// e.g.2. t1 JOIN t2 ON t1.c1 = t2.c2 JOIN t3 ON t2.c5 = t3.c6 +/// inner=[{t1.c1, t2.c2}, {t2.c5, t3.c6}], outer={} +/// +/// e.g.3. t1 LEFT JOIN t2 ON t1.c1 = t2.c2 LEFT JOIN t3 ON t1.c1 = t3.c3 +/// inner=[], outer={t1.c1 -> t2.c2, t1.c1 -> t3.c3} +/// +/// e.g.4. t1 LEFT JOIN t2 ON t1.c1 = t2.c2 LEFT JOIN t3 ON t2.c2 = t3.c3 +/// inner=[], outer={t1.c1 -> t2.c2, t2.c2 -> t3.c3} +/// +/// e.g.5. t1 LEFT JOIN t2 ON t1.c1 = t2.c2 JOIN t3 ON t2.c2 = t3.c3 +/// inner=[{t2.c2, t3.c3}], outer={t1.c1 -> t2.c2} +/// This case actually will not happen, because we run outerjoin reduce +/// rule before the predicate movearound. the first LEFT join will be +/// converted to INNER join as there is a null-rejected predicate on +/// join column of right table. +/// +/// e.g.6. t1 JOIN t2 ON t1.c1 = t2.c2 LEFT JOIN t3 ON t2.c2 = t3.c3 +/// inner=[{t1.c1, t2.c2}], outer={t2.c2 -> t3.c3, t1.c1 -> t3.c3} +#[derive(Debug, Default, Clone)] +pub struct InnerSet { + /// equal set of inner join + pub(super) eq_sets: Vec>, + /// predicates other than equal columns. + /// if node is join, it stores join conditions, + /// otherwise filter expressions. + pub(super) filt: Vec, + /// Functional dependency + /// + /// The meaning is one or more source columns can determine a target column value, + /// we say the target column has functional dependency of source columns. + /// + /// There are two kinds of functional dependencies: + /// 1. from projection. + /// e.g. SELECT c1, c1+1 as c2 FROM t1 + /// Here c2's value is determined by c1. so there is one dependency c2 -> c1+1 + /// Such dependency is not important, because if we have a predicate on top of the tree, + /// when we push it down through query blocks, it will be converted to suitable one. + /// If we have a predicate at leaf of the tree, we can pull multiple derived predicates + /// due to the functional dependency. + /// e.g. SELECT c1, c1+1 as c2 FROM t1 WHERE c1+1 > 0 + /// There are two possible predicates: [c1+1>0, c2>0]. Once we push them down to leaf, + /// the duplicates can be eliminated and only one remains: c1+1>0. + /// + /// 2. from table constraints, such as primary key, unique key. + /// e.g. Table T has columns c1, c2, c3. c1 is primary key. + /// So values of c2, c3 are all determined by c1. + /// If there is a predicate c2 > 0, we can generate one dependency c2 -> col(T, 1, c1). + /// + /// Here is one example of predicate move-around by using functional dependency. + /// + /// select * from (select c1, c1+1 as c2 from t1) t1, t2 where t1.c1 = t2.c2 where t1.c2 > 0 + /// + /// Once predicate are all pulled at root node, there are: + /// 1. equal set: {t1.c1, t2.c2} + /// 2. filter: [t1.c2 > 0] + /// 3. functional dependency: t1.c2 -> t1.c1+1 + /// + /// Now we can first derive t1.c1+1>0, from #2, #3. + /// Then we can derive t2.c2+1>0 by using #1. + pub(super) fnl_deps: HashMap>, +} + +impl InnerSet { + #[inline] + pub fn is_empty(&self) -> bool { + self.eq_sets.is_empty() && self.filt.is_empty() && self.fnl_deps.is_empty() + } + + /// Retain the differences from other inner set. + /// This is used when the filters cannot be pushed down to child operator. + #[inline] + pub fn retain_diff(&mut self, other: &InnerSet) { + // retain different eq set. + let mut intersection: Vec = vec![]; + for eq_set in &mut self.eq_sets { + if eq_set.len() > 1 { // skip if eq set is not meaningless. + for other_set in &other.eq_sets { + intersection.clear(); + intersection.extend(eq_set.intersection(other_set)); + if intersection.len() > 1 { + for gid in &intersection[1..] { + eq_set.remove(gid); + } + } + } + } + } + self.eq_sets.retain(|eq_set| eq_set.len() > 1); + // retain different filter expression. + self.filt.retain(|e| !other.filt.contains(e)); + } + + #[inline] + pub fn merge(&mut self, new: InnerSet) { + // merge inner equal sets + for s in new.eq_sets { + self.handle_eq_set(s); + } + // merge predicates + for p in new.filt { + self.handle_filt(p); + } + for (gid, deps) in new.fnl_deps { + for dep in deps { + self.handle_dep(gid, dep); + } + } + } + + #[inline] + pub fn handle_eq_set(&mut self, eq_set: HashSet) { + merge_eq_sets(&mut self.eq_sets, eq_set); + } + + /// handle filter expressions. + #[inline] + pub fn handle_filt(&mut self, pred: PredExpr) { + match &pred { + PredExpr::OneCol(..) | PredExpr::Other(_) | PredExpr::ColEqConst(..) => { + for p in &self.filt { + if p == &pred { + return; + } + } + self.filt.push(pred); + } + PredExpr::TwoColEq(gid1, gid2) => { + let mut eq_set = HashSet::new(); + eq_set.insert(*gid1); + eq_set.insert(*gid2); + merge_eq_sets(&mut self.eq_sets, eq_set); + } + } + } + + #[inline] + pub fn handle_dep(&mut self, tgt_gid: GlobalID, expr: ExprKind) { + if let ExprKind::Const(c) = expr { + self.handle_filt(PredExpr::ColEqConst(tgt_gid, c)); + return; + } + self.fnl_deps.entry(tgt_gid).or_default().push(expr); + } + + #[inline] + pub fn total_fnl_deps(&self) -> usize { + self.fnl_deps.values().map(|v| v.len()).sum() + } +} + +/// Outer set stores unidirectional equal set of columns and join +/// conditions for outer(left/full) join. +#[derive(Debug, Default)] +pub struct OuterSet { + /// unidirectional equal column mapping of outer(left) join + eq_map: HashMap>, + /// join conditions + cond: Vec, +} + +impl OuterSet { + /// handle outer join condition + #[inline] + pub fn handle_cond( + &mut self, + cond: PredExpr, + side: &JoinSide, + col_map: &HashMap, + ) { + match &cond { + PredExpr::OneCol(..) | PredExpr::Other(_) | PredExpr::ColEqConst(..) => { + self.cond.push(cond); + } + PredExpr::TwoColEq(gid1, gid2) => { + match side { + // full join + JoinSide::Full => { + self.cond.push(cond); + } + // left join + JoinSide::Right(_) => match ( + side.left(col_map[&gid1].kind.qry_id().unwrap()), + side.left(col_map[&gid2].kind.qry_id().unwrap()), + ) { + (true, true) => { + // both left side + self.cond.push(cond); + } + (false, false) => { + // both right side, let predicate pushdown to push to right child. + self.cond.push(cond); + } + (true, false) => { + // gid1 from left side, gid2 from right side + self.eq_map.entry(*gid1).or_default().insert(*gid2); + } + (false, true) => { + // gid2 from left side, gid1 from right side + self.eq_map.entry(*gid2).or_default().insert(*gid1); + } + }, + } + } + } + } +} + +#[inline] +fn merge_eq_sets(dst: &mut Vec>, new: HashSet) { + let mut match_indexes: SmallVec<[_; 2]> = smallvec![]; + for (i, d_set) in dst.iter().enumerate() { + if !d_set.is_disjoint(&new) { + match_indexes.push(i); + } + } + if match_indexes.is_empty() { + dst.push(new); + } else { + for s in match_indexes[1..].iter().rev() { + let set2 = dst.remove(*s); + let set1 = &mut dst[match_indexes[0]]; + for gid in set2 { + set1.insert(gid); + } + } + let set1 = &mut dst[match_indexes[0]]; + for gid in new { + set1.insert(gid); + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PredExpr { + // predicate involves one column, e.g. c1 > 0, c1 = 1, c1 like 'abc' + OneCol(GlobalID, ExprKind), + // column equal const + ColEqConst(GlobalID, Const), + // two columns equal, e.g. c1 = c2. + TwoColEq(GlobalID, GlobalID), + Other(ExprKind), +} + +impl PredExpr { + #[inline] + pub(super) fn include_col(&self, gid: GlobalID) -> bool { + match self { + PredExpr::OneCol(g, ..) | PredExpr::ColEqConst(g, _) => *g == gid, + PredExpr::TwoColEq(g1, g2) => *g1 == gid || *g2 == gid, + PredExpr::Other(e) => include_col(e, gid), + } + } + + #[inline] + pub(super) fn find_fnl_dep(&self) -> HashSet { + match self { + PredExpr::ColEqConst(..) | PredExpr::TwoColEq(..) => HashSet::new(), + PredExpr::OneCol(_, e) | PredExpr::Other(e) => { + let mut fd = FindFnlDep(HashSet::new()); + let _ = e.walk(&mut fd); + fd.0 + } + } + } + + #[inline] + pub(super) fn replace_col_with_expr( + &self, + gid: GlobalID, + expr: &ExprKind, + col_map: &HashMap, + ) -> Self { + match self { + PredExpr::OneCol(g, e) => { + if *g == gid { + let mut new_e = e.clone(); + update_col_inplace(&mut new_e, gid, expr); + return new_e.into(); + } + self.clone() + } + PredExpr::ColEqConst(g, cst) => { + if *g == gid { + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![expr.clone(), ExprKind::Const(cst.clone())], + ); + return new_e.into(); + } + self.clone() + } + PredExpr::TwoColEq(gid1, gid2) => { + if *gid1 == gid { + let c2 = col_map[gid2].clone(); + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(c2), expr.clone()], + ); + return new_e.into(); + } + if *gid2 == gid { + let c1 = col_map[gid1].clone(); + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(c1), expr.clone()], + ); + return new_e.into(); + } + self.clone() + } + PredExpr::Other(e) => { + let mut new_e = e.clone(); + update_col_inplace(&mut new_e, gid, expr); + new_e.into() + } + } + } + + /// replace one column with given expression. + /// If the expression contains more columns, do nothing + #[inline] + pub(super) fn replace_cols(&self, col_map: &HashMap) -> Option { + match self { + PredExpr::OneCol(gid, e) => { + if let Some(col) = col_map.get(gid) { + let mut new_e = e.clone(); + update_cols_inplace(&mut new_e, col_map); + Some(PredExpr::OneCol(col.gid, new_e)) + } else { + None + } + } + PredExpr::ColEqConst(gid, cst) => { + if let Some(c) = col_map.get(gid) { + Some(PredExpr::ColEqConst(c.gid, cst.clone())) + } else { + None + } + } + PredExpr::TwoColEq(gid1, gid2) => { + if let Some(c1) = col_map.get(gid1) { + if let Some(c2) = col_map.get(gid2) { + return Some(PredExpr::TwoColEq(c1.gid, c2.gid)); + } + } + None + } + PredExpr::Other(e) => { + let mut new_e = e.clone(); + let mut rc = ReplaceCols(col_map); + if new_e.walk_mut(&mut rc).is_continue() { + return Some(PredExpr::Other(new_e)); + } + None + } + } + } + + /// replace expressions with columns + #[inline] + pub(super) fn replace_exprs_with_cols( + &self, + e2c: &HashMap, + c2c: &HashMap, + ) -> Option { + match self { + PredExpr::OneCol(_, e) | PredExpr::Other(e) => { + let mut new_e = e.clone(); + let mut ro = RewriteExprOut { + e2c, + c2c, + expr_replaced: false, + }; + if new_e.walk_mut(&mut ro).is_break() { + return None; + } + Some(new_e.into()) + } + PredExpr::ColEqConst(gid, cst) => { + if let Some(c) = c2c.get(gid) { + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(c.clone()), ExprKind::Const(cst.clone())], + ); + return Some(new_e.into()); + } + None + } + PredExpr::TwoColEq(gid1, gid2) => { + if let Some(c1) = c2c.get(gid1) { + if let Some(c2) = c2c.get(gid2) { + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(c1.clone()), ExprKind::Col(c2.clone())], + ); + return Some(new_e.into()); + } + } + None + } + } + } + + #[inline] + pub(super) fn replace_cols_with_exprs(&self, c2e: &HashMap) -> Option { + match self { + PredExpr::OneCol(_, e) | PredExpr::Other(e) => { + let mut new_e = e.clone(); + let mut ri = RewriteExprIn(c2e); + if new_e.walk_mut(&mut ri).is_break() { + return None; + } + Some(new_e.into()) + } + PredExpr::ColEqConst(gid, cst) => { + if let Some(e) = c2e.get(gid) { + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![e.clone(), ExprKind::Const(cst.clone())], + ); + return Some(new_e.into()); + } + None + } + PredExpr::TwoColEq(gid1, gid2) => { + if let Some(e1) = c2e.get(gid1) { + if let Some(e2) = c2e.get(gid2) { + let new_e = ExprKind::pred_func( + PredFuncKind::Equal, + vec![e1.clone(), e2.clone()], + ); + return Some(new_e.into()); + } + } + None + } + } + } + + /// check if all columns included in given set. + #[inline] + pub(super) fn all_cols_included(&self, col_set: &HashSet) -> bool { + match self { + PredExpr::OneCol(gid, _) => col_set.contains(gid), + PredExpr::TwoColEq(gid1, gid2) => col_set.contains(gid1) && col_set.contains(gid2), + PredExpr::ColEqConst(gid, _) => col_set.contains(gid), + PredExpr::Other(e) => { + let mut aci = AllColsIncluded(col_set); + e.walk(&mut aci).is_continue() + } + } + } + + /// convert predicate to expression back + #[inline] + pub(super) fn into_expr(self, col_map: &HashMap) -> Option { + match self { + PredExpr::OneCol(gid, e) => { + if col_map.contains_key(&gid) { + let mut new_e = e.clone(); + update_cols_inplace(&mut new_e, col_map); + Some(new_e) + } else { + None + } + } + PredExpr::ColEqConst(gid, cst) => { + if let Some(c) = col_map.get(&gid) { + Some(ExprKind::pred_func(PredFuncKind::Equal, vec![ExprKind::Col(c.clone()), ExprKind::Const(cst)])) + } else { + None + } + } + PredExpr::TwoColEq(gid1, gid2) => { + if let Some(c1) = col_map.get(&gid1) { + if let Some(c2) = col_map.get(&gid2) { + return Some(ExprKind::pred_func(PredFuncKind::Equal, vec![ExprKind::Col(c1.clone()), ExprKind::Col(c2.clone())])); + } + } + None + } + PredExpr::Other(e) => { + let mut new_e = e.clone(); + let mut rc = ReplaceCols(col_map); + if new_e.walk_mut(&mut rc).is_continue() { + return Some(new_e); + } + None + } + } + } + + /// Returns whether the predicate expression contains aggregation functions. + #[inline] + pub fn contains_aggr_func(&self) -> bool { + match self { + PredExpr::TwoColEq(..) | PredExpr::ColEqConst(..) => false, + PredExpr::OneCol(_, e) | PredExpr::Other(e) => e.contains_aggr_func(), + } + } +} + +impl From for PredExpr { + /// Classify an expression and convert to predicate expression, + /// All used column info must be stored in the column mapping before this call. + /// This is guaranteed by preorder processing. + #[inline] + fn from(expr: ExprKind) -> PredExpr { + if let ExprKind::Pred(Pred::Func { + kind: PredFuncKind::Equal, + args, + }) = &expr + { + match args.as_slice() { + [ExprKind::Col(Col { gid: gid1, .. }), ExprKind::Col(Col { gid: gid2, .. })] => { + if gid1 != gid2 { + return PredExpr::TwoColEq(*gid1, *gid2); + } + } + [ExprKind::Col(Col { gid, .. }), ExprKind::Const(c)] + | [ExprKind::Const(c), ExprKind::Col(Col { gid, .. })] => { + return PredExpr::ColEqConst(*gid, c.clone()) + } + _ => (), + } + } + let mut csc = CheckSinglePlainCol(None); + if expr.walk(&mut csc).is_break() || csc.0.is_none() { + PredExpr::Other(expr) + } else { + PredExpr::OneCol(csc.0.unwrap(), expr) + } + } +} +pub enum JoinSide { + Right(HashSet), // only for left join + Full, // only for full join +} + +impl JoinSide { + #[inline] + fn left(&self, qid: QueryID) -> bool { + if let JoinSide::Right(qids) = self { + return !qids.contains(&qid); + } + false + } +} + +struct PredPullup<'a, C> { + catalog: &'a C, + qry_set: &'a mut QuerySet, + pred_map: &'a mut PredMap, + // columns of potential functional dependencies + dep_cols: &'a mut DepCols, +} + +impl PredPullup<'_, C> { + #[inline] + fn collect_dep_cols(&mut self, exprs: &[ExprKind]) { + let mut cfd = CollectDepCols(self.dep_cols); + for e in exprs { + let _ = e.walk(&mut cfd); + } + } + + #[inline] + fn pullup_query(&mut self, op_id: GlobalID, qid: QueryID) -> ControlFlow { + pred_pullup( + self.catalog, + self.qry_set, + qid, + self.pred_map, + self.dep_cols, + ) + .branch()?; + let subq = self.qry_set.get(&qid).unwrap(); + let qry_root_id = subq.root.id; + let mut inner = self.pred_map.remove_inner(qry_root_id); + let mut new_inner = InnerSet::default(); + + // translate inner set by output mapping. + let mut col_out = HashMap::new(); + let mut expr_out = HashMap::new(); + for (i, e) in subq.out_cols().iter().enumerate() { + let qry_col = QryCol(qid, ColIndex::from(i as u32)); + // query column which are not in out map is not used by parent nodes. + // if column prune is executed, they are already removed. + if let Some(out_gid) = self.pred_map.qry_col_map.get(&qry_col) { + let col = Col { + gid: *out_gid, + kind: ColKind::Query(qry_col.0), + idx: qry_col.1, + }; + if let ExprKind::Col(Col { gid, .. }) = &e.expr { + col_out.insert(*gid, col); + } else { + // any other expressions can be put into expr output map + if let Entry::Vacant(vac) = expr_out.entry(e.expr.clone()) { + vac.insert(col); + } + } + } + } + // convert constant projections to equal predicates + for (e, c) in &expr_out { + if e.is_const() { + let p = ExprKind::pred_func( + PredFuncKind::Equal, + vec![ExprKind::Col(c.clone()), e.clone()], + ) + .into(); + new_inner.handle_filt(p); + } + } + + // translate functional dependencies + // handle old dependencies + for (tgt_gid, deps) in mem::take(&mut inner.fnl_deps) { + // check if the target of functional dependency is in output, + // if not, we try equal set. + let gid_opt = if let Some(out_col) = col_out.get(&tgt_gid) { + Some(out_col.gid) + } else if let Some(eq_set) = inner.eq_sets.iter().find(|s| s.contains(&tgt_gid)) { + // target not in output but has equal set. + if let Some(out_col) = eq_set.iter().find_map(|gid| col_out.get(gid)) { + // at least one column in equal set is also in output. + Some(out_col.gid) + } else { + None + } + } else { + None + }; + if let Some(final_gid) = gid_opt { + // check if src are all in output + for mut dep in deps { + let mut rc = ReplaceCols(&col_out); + if dep.walk_mut(&mut rc).is_continue() { + // all source columns can be translated to output column + new_inner.handle_dep(final_gid, dep); + } + } + } else { + // the target of dependency is not exported, try to convert predicates that is involved. + // e.g. SELECT c0 FROM t2 WHERE c2 > 0 + // even though c2 is not exported, we can convert and export predicate: + // TableDepFunc(t2, c2_idx, c0) > 0 + for p in &inner.filt { + if p.include_col(tgt_gid) { + // try to convert filter according to dependency. + for dep in &deps { + let new_p = + p.replace_col_with_expr(tgt_gid, dep, &self.pred_map.col_map); + new_inner.handle_filt(new_p); + } + } + } + } + } + // translate equal set + for eq_set in &mut inner.eq_sets { + let new_set: HashSet<_> = eq_set + .iter() + .filter_map(|k| col_out.get(k).map(|c| c.gid)) + .collect(); + if new_set.len() > 1 { + // output at least two columns of this equal set, we need to remove new_set.len() - 1 + // columns of original set. + let mut new_set_iter = new_set.iter(); + for _ in 0..new_set.len() - 1 { + if let Some(k) = new_set_iter.next() { + eq_set.remove(k); + } + } + new_inner.eq_sets.push(new_set); + } // otherwise, keep original equal set as is + } + // translate filter + let mut old_filt = vec![]; + for p in inner.filt { + if let Some(new_p) = p.replace_exprs_with_cols(&expr_out, &col_out) { + new_inner.handle_filt(new_p); + // duplicate the predicate by only use out columns + if let Some(new_p) = p.replace_cols(&col_out) { + new_inner.handle_filt(new_p); + } + } else { + old_filt.push(p); + } + } + inner.filt = old_filt; + + println!( + "query fnl_deps={:?}, keys={:?}", + new_inner.total_fnl_deps(), + Vec::from_iter(new_inner.fnl_deps.keys()) + ); + // update back + self.pred_map.insert_inner(qry_root_id, inner); + // update new set + self.pred_map.insert_inner(op_id, new_inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_cross_join( + &mut self, + op_id: GlobalID, + children: &[JoinOp], + ) -> ControlFlow { + // pull up inner predicates of all children and merge + let (first, others) = children.split_first().unwrap(); + let mut inner = self.pred_map.remove_inner(first.id); + for r in others { + let r_inner = self.pred_map.remove_inner(r.id); + inner.merge(r_inner); + } + // store inner set + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_inner_join( + &mut self, + op_id: GlobalID, + left_id: GlobalID, + right_id: GlobalID, + cond: Vec, + filt: Vec, + ) -> ControlFlow { + // pull up inner predicates from left child + let mut inner = self.pred_map.remove_inner(left_id); + // pull up inner predicates from right child + let r = self.pred_map.remove_inner(right_id); + // merge right + inner.merge(r); + // merge conditions and filters + for e in cond { + inner.handle_filt(e.into()); + } + for e in filt { + inner.handle_filt(e.into()); + } + println!( + "inner fnl_deps={:?}, keys={:?}", + inner.total_fnl_deps(), + Vec::from_iter(inner.fnl_deps.keys()) + ); + // store inner set + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_left_join( + &mut self, + op_id: GlobalID, + left_id: GlobalID, + right: &JoinOp, + cond: Vec, + filt: Vec, + ) -> ControlFlow { + // pull up inner predicates of left child, + // and reject right child + let mut inner = self.pred_map.remove_inner(left_id); + // merge filters + for e in filt { + inner.handle_filt(e.into()); + } + // handle outer join conditions + let mut right_qids = HashSet::new(); + right.collect_qry_ids(&mut right_qids); + let side = JoinSide::Right(right_qids); + let mut outer = OuterSet::default(); + for e in cond { + outer.handle_cond(e.into(), &side, &self.pred_map.col_map); + } + // store inner set + self.pred_map.insert_inner(op_id, inner); + // store outer set + self.pred_map.insert_outer(op_id, outer); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_full_join( + &mut self, + op_id: GlobalID, + cond: Vec, + filt: Vec, + ) -> ControlFlow { + // reject either left or right child + if !filt.is_empty() { + let mut inner = InnerSet::default(); + for e in filt { + inner.handle_filt(e.into()); + } + // store inner set + self.pred_map.insert_inner(op_id, inner); + } + let mut outer = OuterSet::default(); + for e in cond { + outer.handle_cond(e.into(), &JoinSide::Full, &self.pred_map.col_map); + } + // store outer set + self.pred_map.insert_outer(op_id, outer); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_filt( + &mut self, + op_id: GlobalID, + input_id: GlobalID, + pred: Vec, + ) -> ControlFlow { + let input_id = input_id; + // pull up all preds and equal sets of child. + let mut inner = self.pred_map.remove_inner(input_id); + for e in pred { + inner.handle_filt(e.into()); + } + println!( + "filter fnl_deps={:?}, keys={:?}", + inner.total_fnl_deps(), + Vec::from_iter(inner.fnl_deps.keys()) + ); + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_union( + &mut self, + op_id: GlobalID, + left_id: GlobalID, + right_id: GlobalID, + cols: &[ProjCol], + ) -> ControlFlow { + let mut left_inner = self.pred_map.remove_inner(left_id); + let mut right_inner = self.pred_map.remove_inner(right_id); + + let mut new_inner = InnerSet::default(); + // intersect equal sets of both side + let (l_map, r_map) = setop_out_map(cols); + for l_eq_set in &mut left_inner.eq_sets { + let l_out: HashSet<_> = l_eq_set.iter().map(|gid| l_map[gid].gid).collect(); + for r_eq_set in &right_inner.eq_sets { + let r_out: HashSet<_> = r_eq_set.iter().map(|gid| r_map[gid].gid).collect(); + let new_set: HashSet = l_out.intersection(&r_out).cloned().collect(); + if new_set.len() > 1 { + new_inner.eq_sets.push(new_set); + } // otherwise, no change + } + } + // intersect filter expressions, current only support exactly the same expression. + // old filt of left child + let mut old_l_filt = vec![]; + // old filt of right child + let mut old_r_filt = vec![]; + let mut l_out_filt: Vec<_> = left_inner + .filt + .iter() + .map(|p| { + let new_p = p.replace_cols(&l_map).unwrap(); + (new_p, false) + }) + .collect(); + let mut r_out_filt: Vec<_> = right_inner + .filt + .iter() + .map(|p| { + let new_p = p.replace_cols(&r_map).unwrap(); + (new_p, false) + }) + .collect(); + for (l_out, l_flag) in &mut l_out_filt { + for (r_out, r_flag) in &mut r_out_filt { + if l_out == r_out { + *l_flag = true; + *r_flag = true; + } + } + } + for (old, (new, pullable)) in left_inner.filt.into_iter().zip(l_out_filt) { + if pullable { + new_inner.handle_filt(new); + } else { + old_l_filt.push(old); + } + } + left_inner.filt = old_l_filt; + for (old, (_, pullable)) in right_inner.filt.into_iter().zip(r_out_filt) { + if !pullable { + old_r_filt.push(old); + } + } + right_inner.filt = old_r_filt; + // update right back + self.pred_map.insert_inner(right_id, right_inner); + // update left back + self.pred_map.insert_inner(left_id, left_inner); + // update new set + self.pred_map.insert_inner(op_id, new_inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_intersect( + &mut self, + op_id: GlobalID, + left_id: GlobalID, + right_id: GlobalID, + cols: &[ProjCol], + ) -> ControlFlow { + let mut left_inner = self.pred_map.remove_inner(left_id); + let mut right_inner = self.pred_map.remove_inner(right_id); + // union equal sets of both side + let (l_map, r_map) = setop_out_map(cols); + let mut new_inner = InnerSet::default(); + for eq_set in &left_inner.eq_sets { + let new_set = eq_set.iter().map(|gid| l_map[gid].gid).collect(); + new_inner.handle_eq_set(new_set); + } + for eq_set in &right_inner.eq_sets { + let new_set = eq_set.iter().map(|gid| r_map[gid].gid).collect(); + new_inner.handle_eq_set(new_set); + } + // translate and union filt of both side + for p in mem::take(&mut left_inner.filt) { + let new_p = p.replace_cols(&l_map).unwrap(); + new_inner.handle_filt(new_p); + } + for p in mem::take(&mut right_inner.filt) { + let new_p = p.replace_cols(&r_map).unwrap(); + new_inner.handle_filt(new_p); + } + // translate functional dependency + if !left_inner.fnl_deps.is_empty() { + let fnl_deps = mem::take(&mut left_inner.fnl_deps); + for (gid, deps) in fnl_deps { + for mut dep in deps { + let new_gid = l_map[&gid].gid; + update_cols_inplace(&mut dep, &l_map); + new_inner.handle_dep(new_gid, dep); + } + } + } + if !right_inner.fnl_deps.is_empty() { + let fnl_deps = mem::take(&mut right_inner.fnl_deps); + for (gid, deps) in fnl_deps { + for mut dep in deps { + let new_gid = r_map[&gid].gid; + update_cols_inplace(&mut dep, &r_map); + new_inner.handle_dep(new_gid, dep); + } + } + } + // update right back + self.pred_map.insert_inner(right_id, right_inner); + // update left back + self.pred_map.insert_inner(left_id, left_inner); + // update new set + self.pred_map.insert_inner(op_id, new_inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_except( + &mut self, + op_id: GlobalID, + left_id: GlobalID, + cols: &[ProjCol], + ) -> ControlFlow { + let mut inner = self.pred_map.remove_inner(left_id); + // only left map is required + let mut l_map = HashMap::with_capacity(cols.len()); + for pc in cols { + if let ExprKind::Col( + c @ Col { + kind: ColKind::Setop(args), + .. + }, + ) = &pc.expr + { + l_map.insert(args[0].col_gid().unwrap(), c.clone()); + } + } + // translate equal set + for eq_set in &mut inner.eq_sets { + let new_set: HashSet<_> = eq_set.iter().map(|gid| l_map[gid].gid).collect(); + *eq_set = new_set; + } + // translate filter + for p in &mut inner.filt { + let new_p = p.replace_cols(&l_map).unwrap(); + *p = new_p; + } + // translate functional dependency + if !inner.fnl_deps.is_empty() { + let fnl_deps = mem::take(&mut inner.fnl_deps); + for (gid, deps) in fnl_deps { + for mut dep in deps { + let new_gid = l_map[&gid].gid; + update_cols_inplace(&mut dep, &l_map); + inner.handle_dep(new_gid, dep); + } + } + } + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_aggr( + &mut self, + op_id: GlobalID, + input_id: GlobalID, + groups: &[ExprKind], + proj: &[ProjCol], + filt: Vec, + ) -> ControlFlow { + let mut inner = self.pred_map.remove_inner(input_id); + let group_out_cols = collect_group_out_cols(groups, proj); + let mut new_inner = InnerSet::default(); + // collect equal set + for eq_set in &mut inner.eq_sets { + let new_set: HashSet = eq_set + .iter() + .filter(|k| group_out_cols.contains(*k)) + .cloned() + .collect(); + if new_set.len() > 1 { + // output at least two columns of this equal set, we need to remove new_set.len() - 1 + // columns of original set. + let mut new_set_iter = new_set.iter(); + for _ in 0..new_set.len() - 1 { + if let Some(k) = new_set_iter.next() { + eq_set.remove(k); + } + } + new_inner.handle_eq_set(new_set); + } // otherwise, keep original equal set as is + } + // collect filter + let mut old_filt = vec![]; + for p in inner.filt { + if p.all_cols_included(&group_out_cols) { + new_inner.handle_filt(p); + } else { + old_filt.push(p); + } + } + inner.filt = old_filt; + // update back + self.pred_map.insert_inner(input_id, inner); + // aggr may have filters + for e in filt { + new_inner.handle_filt(e.into()); + } + // update new set + self.pred_map.insert_inner(op_id, new_inner); + ControlFlow::Continue(()) + } + + #[inline] + fn collect_tbl_fnl_deps(&mut self, inner: &mut InnerSet, table_id: TableID, cols: &[ProjCol]) { + // note: table may be referred multiple times in one query, + // so we remove the table from dep_tbl_cols map when collecting its dependencies. + if let Some(col_indexes) = self.dep_cols.tbl.remove(&table_id) { + let keys = self.catalog.find_keys(&table_id); + if keys.is_empty() { + return; // no key in current table + } + let exported_cols: HashMap = cols + .iter() + .map(|c| { + if let ExprKind::Col(c @ Col { idx, .. }) = &c.expr { + (*idx, c.clone()) + } else { + unreachable!() + } + }) + .collect(); + + let keys: Vec<_> = keys + .iter() + .filter_map(|k| match k { + Key::PrimaryKey(cols) | Key::UniqueKey(cols) => { + if cols.iter().any(|c| !exported_cols.contains_key(&c.idx)) { + None + } else { + let mut idx_set = HashSet::new(); + let mut exprs = vec![]; + let mut tbl_cols = vec![]; + for c in cols { + idx_set.insert(c.idx); + exprs.push(ExprKind::Col(exported_cols[&c.idx].clone())); + tbl_cols.push(TblCol(c.table_id, c.idx)); + } + Some((idx_set, exprs, tbl_cols)) + } + } + }) + .collect(); + if keys.is_empty() { + return; // no key exported + } + // inject functional dependencies to scan node. + for col_idx in col_indexes { + let gid = exported_cols[&col_idx].gid; + for (k_idx, k_exprs, k_cols) in &keys { + if k_idx.contains(&col_idx) { + continue; // skip if this column is part of key + } + let dep = ExprKind::tbl_fnl_dep(table_id, col_idx, k_exprs.clone()); + inner.handle_dep(gid, dep); + let tbl_col = TblCol(table_id, col_idx); + self.pred_map.tbl_dep_map.entry(tbl_col).or_default().push(k_cols.clone()); + } + } + } + } + + #[inline] + fn pullup_scan( + &mut self, + op_id: GlobalID, + table_id: TableID, + cols: &[ProjCol], + filt: Vec, + ) -> ControlFlow { + let mut inner = InnerSet::default(); + // handle filter expressions + for e in filt { + inner.handle_filt(e.into()); + } + // collect dependencies in table. + self.collect_tbl_fnl_deps(&mut inner, table_id, cols); + println!( + "tbl fnl_deps={:?}, keys={:?}", + inner.total_fnl_deps(), + Vec::from_iter(inner.fnl_deps.keys()) + ); + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_limit(&mut self, op_id: GlobalID, input_id: GlobalID) -> ControlFlow { + // we need to copy inner set because the predicate pushdown can not cross limit. + let inner = self.pred_map.remove_inner(input_id); + self.pred_map.insert_inner(op_id, inner.clone()); + self.pred_map.insert_inner(input_id, inner); + ControlFlow::Continue(()) + } + + #[inline] + fn pullup_base(&mut self, op_id: GlobalID, input_id: GlobalID) -> ControlFlow { + let inner = self.pred_map.remove_inner(input_id); + self.pred_map.insert_inner(op_id, inner); + ControlFlow::Continue(()) + } +} + +impl OpMutVisitor for PredPullup<'_, C> { + type Cont = (); + type Break = Error; + + #[inline] + fn enter(&mut self, op: &mut Op) -> ControlFlow { + // collect column mapping + for e in op.kind.exprs() { + let mut cm = CollectColMapping { + qry_col_map: &mut self.pred_map.qry_col_map, + tbl_col_map: &mut self.pred_map.tbl_col_map, + col_map: &mut self.pred_map.col_map, + }; + let _ = e.walk(&mut cm); + } + // collect potential functional dependencies of table + match &op.kind { + OpKind::Filt { pred, .. } => self.collect_dep_cols(pred), + OpKind::Join(join) => match &**join { + Join::Qualified(QualifiedJoin { cond, filt, .. }) => { + self.collect_dep_cols(cond); + self.collect_dep_cols(filt); + } + _ => (), + }, + OpKind::Aggr(aggr) => self.collect_dep_cols(&aggr.filt), + OpKind::Query(qid) => { + if let Some(subq) = self.qry_set.get(qid) { + let out_cols = subq.out_cols(); + if let Some(indexes) = self.dep_cols.qry.get(qid).cloned() { + for idx in indexes { + let e = &out_cols[idx.value() as usize].expr; + let mut cfd = CollectDepCols(self.dep_cols); + let _ = e.walk(&mut cfd); + } + } + } + } + _ => (), + } + ControlFlow::Continue(()) + } + + #[inline] + fn leave(&mut self, op: &mut Op) -> ControlFlow { + match &mut op.kind { + OpKind::Query(qid) => self.pullup_query(op.id, *qid), + OpKind::Join(join) => match join.as_mut() { + Join::Cross(children) => self.pullup_cross_join(op.id, children), + Join::Qualified(QualifiedJoin { + kind, + left, + right, + cond, + filt, + }) => match kind { + JoinKind::Inner => self.pullup_inner_join( + op.id, + left.id, + right.id, + mem::take(cond), + mem::take(filt), + ), + JoinKind::Left => self.pullup_left_join( + op.id, + left.id, + right, + mem::take(cond), + mem::take(filt), + ), + JoinKind::Full => { + self.pullup_full_join(op.id, mem::take(cond), mem::take(filt)) + } + _ => todo!("unexpected join type: {:?}", kind), + }, + }, + OpKind::Filt { pred, input } => self.pullup_filt(op.id, input.id, mem::take(pred)), + // Scan is the only operator in a query with location equals to disk. + // The original plan won't have predicates on scan. + OpKind::Scan(scan) => { + self.pullup_scan(op.id, scan.table_id, &scan.cols, mem::take(&mut scan.filt)) + } + OpKind::Row(_) => ControlFlow::Continue(()), + OpKind::Setop(setop) => match setop.kind { + SetopKind::Union => { + self.pullup_union(op.id, setop.left.id, setop.right.id, &setop.cols) + } + SetopKind::Intersect => { + self.pullup_intersect(op.id, setop.left.id, setop.right.id, &setop.cols) + } + SetopKind::Except => self.pullup_except(op.id, setop.left.id, &setop.cols), + }, + OpKind::Aggr(aggr) => self.pullup_aggr( + op.id, + aggr.input.id, + &aggr.groups, + &aggr.proj, + mem::take(&mut aggr.filt), + ), + OpKind::Proj { input, .. } | OpKind::Sort { input, limit: None, .. } => { + self.pullup_base(op.id, input.id) + } + OpKind::Limit { input, .. } | OpKind::Sort {input, limit: Some(_), ..} => self.pullup_limit(op.id, input.id), + OpKind::JoinGraph(_) => unreachable!(), + OpKind::Attach(..) | OpKind::Empty => todo!(), + } + } +} + +struct ReplaceCols<'a>(&'a HashMap); + +impl ExprMutVisitor for ReplaceCols<'_> { + type Cont = (); + type Break = (); + + #[inline] + fn leave(&mut self, e: &mut ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(c @ Col { .. }) => { + if let Some(new_col) = self.0.get(&c.gid) { + *c = new_col.clone(); + } else { + // cannot find column to replace + return ControlFlow::Break(()); + } + } + ExprKind::Aggf { .. } => return ControlFlow::Break(()), + _ => (), + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn update_cols_inplace(e: &mut ExprKind, col_out: &HashMap) { + let mut rc = ReplaceCols(col_out); + assert!(e.walk_mut(&mut rc).is_continue()); +} + +struct ReplaceCol<'a>(GlobalID, &'a ExprKind); + +impl ExprMutVisitor for ReplaceCol<'_> { + type Cont = (); + type Break = (); + + #[inline] + fn leave(&mut self, e: &mut ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(Col { gid, .. }) => { + if *gid == self.0 { + *e = self.1.clone(); + } + } + _ => (), + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn update_col_inplace(e: &mut ExprKind, gid: GlobalID, expr: &ExprKind) { + let mut rc: ReplaceCol<'_> = ReplaceCol(gid, expr); + let _ = e.walk_mut(&mut rc); +} + +struct IncludeCol(GlobalID); + +impl ExprVisitor<'_> for IncludeCol { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + if let ExprKind::Col(Col { gid, .. }) = e { + if *gid == self.0 { + return ControlFlow::Break(()); + } + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn include_col(e: &ExprKind, gid: GlobalID) -> bool { + let mut ic = IncludeCol(gid); + e.walk(&mut ic).is_break() +} + +// Rewrite predicate expressions to query output +// e.g.1. SELECT * FROM (SELECT c1+1 as c2 FROM t1 WHERE c1+1 > 0) t +// the top predicate should be c2 > 0. +// e.g.2. SELECT * FROM (SELECT c1 FROM t1 WHERE c1+1 > 0) t +// the top predicate should be c1+1 > 0. +struct RewriteExprOut<'a> { + e2c: &'a HashMap, + c2c: &'a HashMap, + expr_replaced: bool, +} + +impl ExprMutVisitor for RewriteExprOut<'_> { + type Cont = (); + type Break = (); + + #[inline] + fn enter(&mut self, e: &mut ExprKind) -> ControlFlow<(), ()> { + if let Some(c) = self.e2c.get(e) { + *e = ExprKind::Col(c.clone()); + self.expr_replaced = true; + } + ControlFlow::Continue(()) + } + + #[inline] + fn leave(&mut self, e: &mut ExprKind) -> ControlFlow<(), ()> { + if self.expr_replaced { + // just replaced with column, do not check it + self.expr_replaced = false; + return ControlFlow::Continue(()); + } + match e { + ExprKind::Col(c @ Col { .. }) => { + if let Some(new_col) = self.c2c.get(&c.gid) { + *c = new_col.clone(); + } else { + // cannot find column to replace + return ControlFlow::Break(()); + } + } + // predicates contain aggregation function and cannot + // be translated by expression output, fail. + ExprKind::Aggf { .. } => return ControlFlow::Break(()), + _ => (), + } + ControlFlow::Continue(()) + } +} + +/// Rewrite predicate expressions to query input +pub(super) struct RewriteExprIn<'a>(pub(super) &'a HashMap); + +impl ExprMutVisitor for RewriteExprIn<'_> { + type Cont = (); + type Break = (); + + #[inline] + fn leave(&mut self, e: &mut ExprKind) -> ControlFlow<(), ()> { + if let ExprKind::Col(Col{gid, ..}) = e { + if let Some(new_e) = self.0.get(gid) { + *e = new_e.clone(); + } + } + ControlFlow::Continue(()) + } +} + +struct AllColsIncluded<'a>(&'a HashSet); + +impl<'a> ExprVisitor<'a> for AllColsIncluded<'a> { + type Cont = (); + type Break = (); + + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(Col { gid, .. }) => { + if !self.0.contains(gid) { + return ControlFlow::Break(()); + } + } + _ => (), + } + ControlFlow::Continue(()) + } +} + +pub(super) struct CollectColMapping<'a> { + pub(super) qry_col_map: &'a mut HashMap, + pub(super) tbl_col_map: &'a mut HashMap>, + pub(super) col_map: &'a mut HashMap, +} + +impl<'a> ExprVisitor<'a> for CollectColMapping<'a> { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(c @ Col { gid, idx, kind }) => { + self.col_map.insert(*gid, c.clone()); + match kind { + ColKind::Query(qid) | ColKind::Correlated(qid) => { + self.qry_col_map.insert(QryCol(*qid, *idx), *gid); + } + ColKind::Table(table_id, ..) => { + self.tbl_col_map.entry(TblCol(*table_id, *idx)).or_default().insert(*gid); + } + _ => (), + } + } + _ => (), + } + ControlFlow::Continue(()) + } +} + +struct CheckSinglePlainCol(Option); + +impl ExprVisitor<'_> for CheckSinglePlainCol { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(Col { gid, .. }) => { + if let Some(g) = &mut self.0 { + if g == gid { + return ControlFlow::Continue(()); + } + ControlFlow::Break(()) + } else { + self.0 = Some(*gid); + ControlFlow::Continue(()) + } + } + ExprKind::Aggf { .. } => ControlFlow::Break(()), + _ => ControlFlow::Continue(()), + } + } +} + +struct CollectDepCols<'a>(&'a mut DepCols); + +impl<'a> ExprVisitor<'a> for CollectDepCols<'a> { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + match e { + ExprKind::Col(Col { idx, kind, .. }) => match kind { + ColKind::Query(qid) | ColKind::Correlated(qid) => { + self.0.qry.entry(*qid).or_default().insert(*idx); + } + ColKind::Table(table_id, ..) => { + self.0.tbl.entry(*table_id).or_default().insert(*idx); + } + _ => (), + }, + _ => (), + } + ControlFlow::Continue(()) + } +} + +struct ClearFilt<'a>(&'a mut QuerySet); + +impl OpMutVisitor for ClearFilt<'_> { + type Cont = (); + type Break = Error; + #[inline] + fn leave(&mut self, op: &mut Op) -> ControlFlow { + match &mut op.kind { + OpKind::Filt { pred, input } => { + if pred.is_empty() { + *op = mem::take(&mut *input); + } + } + OpKind::Query(qid) => { + clear_filt(self.0, *qid).branch()?; + } + _ => (), + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn collect_group_out_cols(group: &[ExprKind], proj: &[ProjCol]) -> HashSet { + let mut in_group = HashSet::new(); + for g in group { + if let ExprKind::Col(Col { gid, .. }) = g { + in_group.insert(*gid); + } + } + let mut res = HashSet::new(); + for p in proj { + if let ExprKind::Col(Col { gid, .. }) = &p.expr { + if in_group.contains(gid) { + res.insert(*gid); + } + } + } + res +} + +struct FindFnlDep(HashSet); + +impl ExprVisitor<'_> for FindFnlDep { + type Cont = (); + type Break = (); + #[inline] + fn leave(&mut self, e: &ExprKind) -> ControlFlow<(), ()> { + if let ExprKind::FnlDep(dep) = e { + self.0.insert(dep.clone()); + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn setop_out_map(cols: &[ProjCol]) -> (HashMap, HashMap) { + let mut l_map = HashMap::with_capacity(cols.len()); + let mut r_map = HashMap::with_capacity(cols.len()); + for pc in cols { + if let ExprKind::Col( + c @ Col { + kind: ColKind::Setop(args), + .. + }, + ) = &pc.expr + { + l_map.insert(args[0].col_gid().unwrap(), c.clone()); + r_map.insert(args[1].col_gid().unwrap(), c.clone()); + } + } + (l_map, r_map) +} + +#[derive(Default)] +struct DepCols { + qry: HashMap>, + tbl: HashMap>, +} + + + + +#[cfg(test)] +mod tests { + use super::*; + use crate::lgc::tests::{assert_j_plan1, j_catalog, print_plan}; + use crate::lgc::OpVisitor; + use doradb_catalog::Catalog; + + #[test] + fn test_pred_pullup_single_table() { + let cat = j_catalog(); + assert_inner_set(&cat, "select c1 from t1", |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }); + assert_inner_set(&cat, "select * from (select c1 from t1) x1", |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }); + assert_inner_set( + &cat, + "select c1 from (select c1 from t1 where c1 = 0) x1", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert_eq!(inner.filt.len(), 1); + }, + ); + assert_inner_set(&cat, "select c1 from t1 where t1.c1 = t1.c0", |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }); + assert_inner_set( + &cat, + "select c1 from t1 where t1.c1 = t1.c0 and t1.c1 = 0", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1 from t1 where c0 + c1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1, c0 from t1 where c0 + c1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, 1 as c1 from t1) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); // project a constant is converted to a equal predicate + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c0+1 as c1 from t1) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(inner.total_fnl_deps() == 0); // ignore projection dependency. + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1 from t1 where c1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + assert!(inner.total_fnl_deps() == 1); // pk dependency. + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1 from t3 where c1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + assert!(inner.total_fnl_deps() == 0); // filter contains pk. + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c2 from t3 where c2 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + assert!(inner.total_fnl_deps() == 0); // part of pk not exported. + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1, c2 from t4 where c2 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + assert!(inner.total_fnl_deps() == 2); // pk and uk + }, + ); + assert_inner_set( + &cat, + "select * from (select c0 from t2 where c2 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); // table dependency enables predicate conversion + assert!(inner.total_fnl_deps() == 0); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1 from t2 where c2 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); // no key exported, dependency and predicates is dropped + assert!(inner.total_fnl_deps() == 0); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1+1 as c3 from t2 where c1+1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); // expression rewrite + assert!(inner.total_fnl_deps() == 0); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1, c1+1 as c2 from t2 where c1+1 > 0) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); // predicates of both c1 and c2 are kept. + assert!(inner.total_fnl_deps() == 0); + }, + ); + } + + #[test] + fn test_pred_pullup_cross_join() { + let cat = j_catalog(); + assert_inner_set(&cat, "select t1.c1 from t1, t2", |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }); + assert_inner_set( + &cat, + "select t1.c1 from t1, t2 where t1.c1 = t2.c2", + |inner| { + assert!(inner.eq_sets.len() == 1); + assert!(inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select t1.c1 from (select c1 from t1 where c1 = 0) t1, t2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select t1.c1 from (select c1 from t1 where c1 = 0) t1, (select c2 from t2 where c2 = 0) t2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 = 0) t1, (select c2 from t2 where c2 = 0) t2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 = 0) t1, (select c2 from t2 where c2 = 0) t2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 = 0) t1, t2 where t1.c1 = t2.c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1 from t1 where c0 = c1) t1, t2 where t1.c1 = t2.c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1 from t1 where c0 = c1) t1, (select c2, c3 from t3 where c2 = c3) t3", + |inner| { + assert!(inner.eq_sets.len() == 2 && inner.eq_sets[0].len() == 2 && inner.eq_sets[1].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select * from (select c0, c1 from t1 where c0 = c1) t1, (select c2, c3 from t3 where c2 = c3) t3 where t1.c1 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 4); + assert!(inner.filt.is_empty()); + }, + ); + } + + #[test] + fn test_pred_pullup_outer_join() { + let cat = j_catalog(); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 > 0) t1 left join t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 > 0) t1 right join t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 left join (select c2 from t2 where c2 > 0) t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + } + ); + assert_inner_outer( + &cat, + "select t1.c0, t1.c1, t2.c2 from (select c0, c1 from t1 where c0 = c1) t1 left join t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c0, t2.c2 from t1 left join (select c0, c2 from t2 where c0 = c2) t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from (select c1 from t1 where c1 > 0) t1 full join t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 1); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 full join (select c2 from t2 where c2 > 0) t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 1); + } + ); + assert_inner_outer( + &cat, + "select t1.c0, t1.c1, t2.c2 from (select c0, c1 from t1 where c0 = c1) t1 full join t2 on t1.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 1); + } + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 left join t2 on t1.c1 = t2.c2 and t2.c2 > 0", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.len() == 1); + }, + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 full join t2 on t1.c1 = t2.c2 and t2.c2 > 0", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 2); + }, + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 left join t2 on t1.c0 = t1.c1", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 1); + }, + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 left join t2 on t2.c1 = t2.c2", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.is_empty()); + assert!(outer.cond.len() == 1); + }, + ); + assert_inner_outer( + &cat, + "select t1.c1, t2.c2 from t1 left join t2 on t2.c2 = t1.c1", + |inner, outer| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(outer.eq_map.len() == 1); + assert!(outer.cond.is_empty()); + }, + ); + } + + #[test] + fn test_pred_pullup_aggr() { + let cat = j_catalog(); + assert_inner_set(&cat, "select count(*) from t1", |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }); + assert_inner_set(&cat, "select count(*) from t3 where c1 > 0", |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }); + assert_inner_set( + &cat, + "select c1, count(*) from t3 where c1 > 0 group by c1", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, count(*) from t3 where c2 > 0 group by c1", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, count(*) from t3 where c1 = c2 group by c1", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, count(*) from t3 where c1 = c2 group by c1, c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, count(*) from t3 group by c1 having count(*) > 1", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, count(*) from t3 where c1 + c2 > 0 group by c1, c2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, count(*) from t3 where c1 + c2 > 0 group by c1, c2 having count(*) - 10 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from (select c1, c2, c3 from t3 where c1 = c2) t3 group by c1, c2, c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from (select c1, c2, c3 from t3 where c1 = c2) t3 group by c1, c2, c3 having c2 = c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, count(c3) from (select * from t3 where c3 > 1) t3 group by c1, c2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select * from (select c1, c2, count(c1)+1 from t3 group by c1, c2) t", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + assert!(inner.fnl_deps.is_empty()); + }, + ); + } + + #[test] + fn test_pred_pullup_union() { + let cat = j_catalog(); + assert_inner_set( + &cat, + "select c1, c2 from t3 union select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 union select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 union select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 union select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 union select c1, c2, c3 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 union select c1, c2, c3 from t4 where c1 = c2 and c1 = c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 union select c1, c2, c0 from t4 where c0 = c2 and c2 = c1", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 union select c1, c2, c0 from t4 where c0 = c2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 union select c0, c2, c1 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 union select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 union select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 union select c0, c2, c1 from t4 where c0 > 3", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 union select c0, c2, c1 from t4 where c0 > 3 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 union select c0, c2, c1 from t4 where c0 > 4 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + } + + #[test] + fn test_pred_pullup_except() { + let cat = j_catalog(); + assert_inner_set( + &cat, + "select c1, c2 from t3 except select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 except select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 except select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 except select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 except select c1, c2, c3 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 except select c1, c2, c3 from t4 where c1 = c2 and c1 = c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 except select c1, c2, c0 from t4 where c0 = c2 and c2 = c1", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 except select c1, c2, c0 from t4 where c0 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 except select c0, c2, c1 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 except select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 except select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 except select c0, c2, c1 from t4 where c0 > 3", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 except select c0, c2, c1 from t4 where c0 > 3 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 except select c0, c2, c1 from t4 where c0 > 4 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + } + + #[test] + fn test_pred_pullup_intersect() { + let cat = j_catalog(); + assert_inner_set( + &cat, + "select c1, c2 from t3 intersect select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 intersect select c1, c2 from t4", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 intersect select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2 from t3 where c1 = c2 intersect select c1, c2 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 2); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 intersect select c1, c2, c3 from t4 where c1 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 intersect select c1, c2, c3 from t4 where c1 = c2 and c1 = c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 and c1 = c3 intersect select c1, c2, c0 from t4 where c0 = c2 and c2 = c1", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 = c2 intersect select c1, c2, c0 from t4 where c0 = c2", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 intersect select c0, c2, c1 from t4", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 intersect select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 intersect select c0, c2, c1 from t4 where c0 > 0", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 2); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 intersect select c0, c2, c1 from t4 where c0 > 3", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 1); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 intersect select c0, c2, c1 from t4 where c0 > 3 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 3); + }, + ); + assert_inner_set( + &cat, + "select c1, c2, c3 from t3 where c1 > 3 and c1 < 10 intersect select c0, c2, c1 from t4 where c0 > 4 and c2 < 10", + |inner| { + assert!(inner.eq_sets.is_empty()); + assert!(inner.filt.len() == 4); + }, + ); + } + + #[test] + fn test_pred_pullup_multi_tables() { + let cat = j_catalog(); + assert_inner_set( + &cat, + "select t1.c1, t2.c2 from t1, t2, t3 where t1.c1 = t2.c2 and t2.c2 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c2 from t1, t2, t3 where t1.c1 = t2.c2 and t2.c2 = t3.c3 and t3.c3 > 0", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.len() == 1); + } + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c0 from t1 join t2 on t1.c1 = t2.c2 join t3 on t1.c1 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.is_empty()); + }, + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c0 from t1 join t2 on t1.c1 = t2.c2 join t3 on t1.c1 = t3.c3 where t3.c3 > 0", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.len() == 1); + } + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c0 from t1 join t2 on t1.c1 = t2.c2 join (select * from t3 where c3 > 0) t3 on t1.c1 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.len() == 1); + } + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c0 from t1 join (select * from t2 where c1 > 1) t2 on t1.c1 = t2.c2 join (select * from t3 where c3 > 3) t3 on t1.c1 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.len() == 2); + assert!(inner.total_fnl_deps() == 1); + } + ); + assert_inner_set( + &cat, + "select t1.c1, t2.c0 from t1 join (select * from t2 where c0 > 0) t2 on t1.c1 = t2.c2 join (select * from t3 where c3 > 0) t3 on t1.c1 = t3.c3", + |inner| { + assert!(inner.eq_sets.len() == 1 && inner.eq_sets[0].len() == 3); + assert!(inner.filt.len() == 2); + } + ); + } + + fn assert_inner_set(cat: &C, sql: &str, f: F) { + assert_j_plan1(cat, sql, |s1, mut q1| { + assign_id(&mut q1.qry_set, q1.root).unwrap(); + print_plan(s1, &q1); + let mut pm = PredMap::default(); + let mut dep_cols = DepCols::default(); + pred_pullup(cat, &mut q1.qry_set, q1.root, &mut pm, &mut dep_cols).unwrap(); + print_plan(s1, &q1); + let subq = q1.qry_set.get(&q1.root).unwrap(); + let inner_set = pm.inner.get(&subq.root.id).unwrap(); + f(inner_set); + }) + } + + fn assert_inner_outer(cat: &C, sql: &str, f: F) { + assert_j_plan1(cat, sql, |s1, mut q1| { + assign_id(&mut q1.qry_set, q1.root).unwrap(); + let mut pm = PredMap::default(); + let mut dep_cols = DepCols::default(); + pred_pullup(cat, &mut q1.qry_set, q1.root, &mut pm, &mut dep_cols).unwrap(); + // clear_filt(&mut q1.qry_set, q1.root).unwrap(); + print_plan(s1, &q1); + // let subq = q1.qry_set.get(&q1.root).unwrap(); + let subq = q1.qry_set.get(&q1.root).unwrap(); + let inner_set = pm.inner.get(&subq.root.id).unwrap(); + let outer_id = find_first_outer_join_id(&q1.qry_set, q1.root).unwrap(); + let outer_set = pm.outer.get(&outer_id).unwrap(); + f(inner_set, outer_set); + }) + } + + fn find_first_outer_join_id(qry_set: &QuerySet, root: QueryID) -> Option { + struct FindFirstOuterJoinID<'a>(&'a QuerySet); + impl OpVisitor for FindFirstOuterJoinID<'_> { + type Cont = (); + type Break = GlobalID; + #[inline] + fn enter(&mut self, op: &Op) -> ControlFlow { + match &op.kind { + OpKind::Query(qid) => { + if let Some(gid) = find_first_outer_join_id(self.0, *qid) { + return ControlFlow::Break(gid); + } + ControlFlow::Continue(()) + } + OpKind::Join(join) => match join.as_ref() { + Join::Qualified(QualifiedJoin { + kind: JoinKind::Left | JoinKind::Full, + .. + }) => { + return ControlFlow::Break(op.id); + } + _ => ControlFlow::Continue(()), + }, + _ => ControlFlow::Continue(()), + } + } + } + let mut f = FindFirstOuterJoinID(qry_set); + if let Some(subq) = qry_set.get(&root) { + if let ControlFlow::Break(gid) = subq.root.walk(&mut f) { + return Some(gid); + } + } + None + } +} diff --git a/xngin-plan/src/rule/pred_pullup.rs b/doradb-plan/src/rule/pred_pullup_old.rs similarity index 92% rename from xngin-plan/src/rule/pred_pullup.rs rename to doradb-plan/src/rule/pred_pullup_old.rs index cc21292..9857f6d 100644 --- a/xngin-plan/src/rule/pred_pullup.rs +++ b/doradb-plan/src/rule/pred_pullup_old.rs @@ -4,10 +4,10 @@ use crate::lgc::{Op, OpKind, OpMutVisitor, ProjCol, QuerySet}; use crate::rule::expr_simplify::{update_simplify_nested, NullCoalesce, PartialExpr}; use std::collections::{HashMap, HashSet}; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::{ - Col, ColIndex, ColKind, ExprKind, ExprVisitor, FuncKind, GlobalID, Pred, PredFuncKind, - QueryCol, QueryID, +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::{ + Col, ColIndex, ColKind, ExprKind, ExprExt, ExprVisitor, FuncKind, GlobalID, Pred, PredFuncKind, + QryCol, QueryID, }; /// Pullup predicates. @@ -37,8 +37,8 @@ pub fn pred_pullup(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { fn pullup_pred( qry_set: &mut QuerySet, qry_id: QueryID, - p_cols: HashMap, - p_preds: &mut HashMap, + p_cols: HashMap, + p_preds: &mut HashMap, ) -> Result<()> { qry_set.transform_op(qry_id, |qry_set, _, op| { let mut ppu = PredPullup::new(qry_set, p_cols, p_preds); @@ -50,18 +50,18 @@ fn pullup_pred( struct PredPullup<'a> { qry_set: &'a mut QuerySet, // parent columns that predicates target - p_cols: HashMap, + p_cols: HashMap, // predicates converted based on mapping, this field is passed by // parent oprator - p_preds: &'a mut HashMap, + p_preds: &'a mut HashMap, // mapping current cols to parent cols - mapping: HashMap, + mapping: HashMap, // whether the parent columns have been translated. translated: bool, // current columns involved in current predicates. - c_cols: HashMap, + c_cols: HashMap, // current preds, that will be passed to child query. - c_preds: HashMap, + c_preds: HashMap, // store join op that are temporarily removed, in order to restore back stack: Vec, } @@ -70,8 +70,8 @@ impl<'a> PredPullup<'a> { #[inline] fn new( qry_set: &'a mut QuerySet, - p_cols: HashMap, - p_preds: &'a mut HashMap, + p_cols: HashMap, + p_preds: &'a mut HashMap, ) -> Self { PredPullup { qry_set, @@ -100,7 +100,7 @@ impl<'a> PredPullup<'a> { // translate at most once for each query block return; } - for ((p_qid, p_idx), p_gid) in &self.p_cols { + for (QryCol(p_qid, p_idx), p_gid) in &self.p_cols { let c = &out_cols[p_idx.value() as usize]; if let Some((c_col, new_e)) = translate_col(*p_gid, *p_qid, *p_idx, &c.expr) { self.mapping.insert(c_col, new_e); @@ -118,7 +118,7 @@ impl<'a> PredPullup<'a> { if let Some((gid, qid, idx, e)) = translate_pred(p, &self.mapping)? { let peset = self .p_preds - .entry((qid, idx)) + .entry(QryCol(qid, idx)) .or_insert_with(|| PartialExprSet(gid, HashSet::new())); peset.1.insert(e); } @@ -166,12 +166,14 @@ impl<'a> PredPullup<'a> { })], ) = (kind, &args[..]) { - if let Some(PartialExprSet(_, pes)) = self.c_preds.get(&(*l_qid, *l_idx)) { + let l_qry_col = QryCol(*l_qid, *l_idx); + if let Some(PartialExprSet(_, pes)) = self.c_preds.get(&l_qry_col) { for pe in pes { res.push((*r_gid, *r_qid, *r_idx, pe.clone())); } } - if let Some(PartialExprSet(_, pes)) = self.c_preds.get(&(*r_qid, *r_idx)) { + let r_qry_col = QryCol(*r_qid, *r_idx); + if let Some(PartialExprSet(_, pes)) = self.c_preds.get(&r_qry_col) { for pe in pes { res.push((*l_gid, *l_qid, *l_idx, pe.clone())); } @@ -191,7 +193,7 @@ impl OpMutVisitor for PredPullup<'_> { match &mut op.kind { // top down, translate parent cols to current cols, for first proj or aggr OpKind::Proj { cols, .. } => { - self.translate_p_cols(cols.as_ref().unwrap()); + self.translate_p_cols(cols); } // top down , translate parent cols OpKind::Aggr(aggr) => { @@ -249,9 +251,9 @@ impl OpMutVisitor for PredPullup<'_> { let p_cols: HashMap<_, _> = self .c_cols .iter() - .filter_map(|((qid, idx), gid)| { + .filter_map(|(QryCol(qid, idx), gid)| { if qid == qry_id { - Some(((*qid, *idx), *gid)) + Some((QryCol(*qid, *idx), *gid)) } else { None } @@ -279,7 +281,7 @@ impl OpMutVisitor for PredPullup<'_> { OpKind::Proj { input, .. } => { if !self.c_preds.is_empty() { let mut pred = vec![]; - for ((qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { + for (QryCol(qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { for pe in pes { let new_e = ExprKind::pred_func( pe.kind, @@ -298,7 +300,7 @@ impl OpMutVisitor for PredPullup<'_> { } OpKind::Aggr(aggr) => { if !self.c_preds.is_empty() { - for ((qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { + for (QryCol(qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { for pe in pes { let new_e = ExprKind::pred_func( pe.kind, @@ -316,7 +318,7 @@ impl OpMutVisitor for PredPullup<'_> { OpKind::Filt { pred, .. } => { if !pred.is_empty() && !self.c_preds.is_empty() { let new_preds = self.propagate_preds(pred); - for ((qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { + for (QryCol(qid, idx), PartialExprSet(gid, pes)) in self.c_preds.drain() { for pe in pes { let new_e = ExprKind::pred_func( pe.kind, @@ -358,7 +360,7 @@ impl OpMutVisitor for PredPullup<'_> { for (gid, qid, idx, pe) in new_preds { let peset = self .c_preds - .entry((qid, idx)) + .entry(QryCol(qid, idx)) .or_insert_with(|| PartialExprSet(gid, HashSet::new())); peset.1.insert(pe); } @@ -385,7 +387,7 @@ impl OpMutVisitor for PredPullup<'_> { // all others belong to left side and can be pulled up let peset = self .c_preds - .entry((qid, idx)) + .entry(QryCol(qid, idx)) .or_insert_with(|| PartialExprSet(gid, HashSet::new())); peset.1.insert(pe); } @@ -418,11 +420,11 @@ impl OpMutVisitor for PredPullup<'_> { /// collect query columns in expression. #[inline] -fn collect_non_aggr_qry_cols(e: &ExprKind, hs: &mut HashMap) { +fn collect_non_aggr_qry_cols(e: &ExprKind, hs: &mut HashMap) { let mut c = CollectQryCols(hs); let _ = e.walk(&mut c); } -struct CollectQryCols<'a>(&'a mut HashMap); +struct CollectQryCols<'a>(&'a mut HashMap); impl<'a> ExprVisitor<'a> for CollectQryCols<'_> { type Cont = (); @@ -435,7 +437,8 @@ impl<'a> ExprVisitor<'a> for CollectQryCols<'_> { kind: ColKind::Query(qry_id), idx, }) => { - self.0.insert((*qry_id, *idx), *gid); + let qry_col = QryCol(*qry_id, *idx); + self.0.insert(qry_col, *gid); ControlFlow::Continue(()) } ExprKind::Aggf { .. } => ControlFlow::Break(()), @@ -452,7 +455,7 @@ fn translate_col( p_qid: QueryID, p_idx: ColIndex, e: &ExprKind, -) -> Option<(QueryCol, ExprKind)> { +) -> Option<(QryCol, ExprKind)> { let res = match e { ExprKind::Col(Col { kind: ColKind::Query(c_qid), @@ -461,7 +464,7 @@ fn translate_col( }) => { // direct mapping between columns: c_col -> p_col let new_e = ExprKind::query_col(p_gid, p_qid, p_idx); - ((*c_qid, *c_idx), new_e) + (QryCol(*c_qid, *c_idx), new_e) } ExprKind::Func { kind, args, .. } => match (kind, &args[..]) { ( @@ -477,7 +480,7 @@ fn translate_col( FuncKind::Sub, vec![ExprKind::query_col(p_gid, p_qid, p_idx), c.clone()], ); - ((*c_qid, *c_idx), new_e) + (QryCol(*c_qid, *c_idx), new_e) } ( FuncKind::Sub, @@ -492,7 +495,7 @@ fn translate_col( FuncKind::Add, vec![ExprKind::query_col(p_gid, p_qid, p_idx), c.clone()], ); - ((*c_qid, *c_idx), new_e) + (QryCol(*c_qid, *c_idx), new_e) } ( FuncKind::Sub, @@ -507,7 +510,7 @@ fn translate_col( FuncKind::Sub, vec![c.clone(), ExprKind::query_col(p_gid, p_qid, p_idx)], ); - ((*c_qid, *c_idx), new_e) + (QryCol(*c_qid, *c_idx), new_e) } _ => return None, }, @@ -521,7 +524,7 @@ fn translate_col( #[inline] fn translate_pred( c_pred: &ExprKind, - mapping: &HashMap, + mapping: &HashMap, ) -> Result> { let mut new_p = c_pred.clone(); let res = update_simplify_nested(&mut new_p, NullCoalesce::False, |e| match e { @@ -530,7 +533,8 @@ fn translate_pred( idx, .. }) => { - if let Some(new_e) = mapping.get(&(*qry_id, *idx)) { + let qry_col = QryCol(*qry_id, *idx); + if let Some(new_e) = mapping.get(&qry_col) { *e = new_e.clone(); Ok(()) } else { diff --git a/doradb-plan/src/rule/pred_pushdown.rs b/doradb-plan/src/rule/pred_pushdown.rs new file mode 100644 index 0000000..fcfda98 --- /dev/null +++ b/doradb-plan/src/rule/pred_pushdown.rs @@ -0,0 +1,1565 @@ +use crate::error::{Error, Result}; +use crate::join::{Join, JoinKind, JoinOp, QualifiedJoin}; +use crate::lgc::{Op, OpKind, OpVisitor, ProjCol, QryIDs, QuerySet, Aggr}; +use crate::rule::expr_simplify::{simplify_nested, NullCoalesce}; +use crate::rule::pred_move::{InnerSet, PredMap, CollectColMapping, RewriteExprIn}; +use crate::rule::op_id::assign_id; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; +use std::mem; +use std::hash::Hash; +use std::cell::RefCell; +use doradb_catalog::{TableID, TblCol}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::fold::Fold; +use doradb_expr::{Col, ColKind, ColIndex, ExprKind, ExprExt, ExprVisitor, FnlDep, GlobalID, PredFuncKind, QueryID, QryCol}; + +#[inline] +pub fn pred_pushdown(qry_set: &mut QuerySet, qry_id: QueryID, col_id: &mut GlobalID) -> Result<()> { + let mut pred_map = PredMap::default(); + let inner = InnerSet::default(); + let mut max_op_id = assign_id(qry_set, qry_id)?; + collect_cols(qry_set, qry_id, &mut pred_map)?; + pushdown_pred(qry_set, qry_id, &mut pred_map, &mut max_op_id, col_id, inner)?; + Ok(()) +} + +#[inline] +fn collect_cols(qry_set: &mut QuerySet, qry_id: QueryID, pred_map: &mut PredMap) -> Result<()> { + qry_set.transform_op(qry_id, |qry_set, _, op| { + let mut cc = CollectCols { qry_set, pred_map}; + op.walk(&mut cc).unbranch() + })? +} + +#[inline] +fn pushdown_pred(qry_set: &mut QuerySet, qry_id: QueryID, pred_map: &mut PredMap, op_id: &mut GlobalID, col_id: &mut GlobalID, inner: InnerSet) -> Result<()> { + qry_set.transform_op(qry_id, |qry_set, _, op| { + let mut ppd = PredPushdown { qry_set, pred_map, op_id, col_id }; + ppd.push(op, inner) + })? +} + +struct PredPushdown<'a> { + qry_set: &'a mut QuerySet, + pred_map: &'a mut PredMap, + op_id: &'a mut GlobalID, + col_id: &'a mut GlobalID, +} + +impl PredPushdown<'_> { + + /// push inner set to node. + #[inline] + pub fn push(&mut self, op: &mut Op, mut inner: InnerSet) -> Result<()> { + inner = self.pred_map.extract_and_merge_inner(op.id, inner); + match &mut op.kind { + OpKind::Limit{input, ..} | OpKind::Sort{limit: Some(_), input, ..} => { + // predicates should not be pushed down to limit or sort operator, + // except it originates from downside. + let inner = self.push_retain_diff(input, inner)?; + // create new filter if some predicates remained above limit operator. + if !inner.is_empty() { + let filter = self.create_new_filt(mem::take(op), inner); + *op = filter; + } + Ok(()) + } + OpKind::Proj { input, .. } | OpKind::Sort{input, ..} | OpKind::Attach(input, _) => { + self.push_base(input, inner) + } + OpKind::Filt { input, pred } => { + if !pred.is_empty() { + for e in mem::take(pred) { + inner.handle_filt(e.into()); + } + } + self.push(input, inner)?; + let mut input = mem::take(input); + mem::swap(op, &mut *input); + Ok(()) + } + OpKind::Query(qry_id) => { + self.push_query(*qry_id, inner) + } + OpKind::Scan(scan) => { + self.apply_scan(scan.table_id, &scan.cols, &mut scan.filt, inner) + } + OpKind::Aggr(aggr) => { + if let Some(new_inner) = self.push_aggr(aggr, inner)? { + if !new_inner.is_empty() { + let orig_op = mem::take(op); + *op = self.create_new_filt(orig_op, new_inner); + } + } + Ok(()) + } + OpKind::Setop(so) => { + // push to both side and won't fail + let left = so.left.as_mut(); + let inner_copy = inner.clone(); + self.push(left, inner_copy)?; + let right = so.right.as_mut(); + self.push(right, inner)?; + Ok(()) + } + OpKind::Empty => Ok(()), + OpKind::Row(_) => todo!(), // todo: evaluate immediately + OpKind::JoinGraph(_) => unreachable!("Predicates pushdown to join graph is not supported"), + OpKind::Join(join) => todo!(), + } + } + + /// Push predicates down across LIMIT operator, we need to + /// retain the differences between current node and its child. + /// So there might be some expression that can not be pushed down. + /// the caller will create a filter upon the LIMIT operator. + #[inline] + fn push_retain_diff(&mut self, input: &mut Box, mut inner: InnerSet) -> Result { + // we should return the difference of input inner set and child's inner set. + if let Some(child_inner) = self.pred_map.get_inner(input.id) { + inner.retain_diff(child_inner); + } + // we should reject all predicates pushed + self.push(input.as_mut(), InnerSet::default())?; + Ok(inner) + } + + /// Push predicates to base operators such as PROJECTION and SORT. + #[inline] + fn push_base(&mut self, input: &mut Box, inner: InnerSet) -> Result<()> { + self.push(&mut *input, inner)?; + Ok(()) + } + + /// Push predicates across AGGREGATE. + #[inline] + fn push_aggr(&mut self, aggr: &mut Box, mut inner: InnerSet) -> Result> { + if !aggr.filt.is_empty() { + for e in mem::take(&mut aggr.filt) { + inner.handle_filt(e.into()); + } + } + // equal set here can always be pushed to child, because + // columns must be in group items. + debug_assert!(inner.eq_sets.iter().all(|eq_set| eq_set_all_included_in_groups(eq_set, &aggr.groups))); + let mut new_inner = InnerSet::default(); + new_inner.eq_sets = mem::take(&mut inner.eq_sets); + if inner.filt.is_empty() { + self.push(&mut aggr.input, new_inner)?; + return Ok(None); + } + // handle filter + // expressions with aggregate functions can not be pushed down to child. + let (curr_filt, child_filt): (Vec<_>, Vec<_>) = inner.filt.into_iter().partition(|e| e.contains_aggr_func()); + inner.filt = curr_filt; + new_inner.filt = child_filt; + self.push(&mut aggr.input, new_inner)?; + if inner.is_empty() { + return Ok(None); + } + Ok(Some(inner)) + } + + // create new filter node and return its child + #[inline] + fn create_new_filt<'a>(&mut self, input: Op, new_inner: InnerSet) -> Op { + let orig_in = input; + let mut pred = vec![]; + self.pred_map.apply_inner(new_inner, &mut pred); + let filter = OpKind::Filt{pred, input: Box::new(orig_in)}; + let op_id = self.op_id.inc_fetch(); + Op{id: op_id, kind: filter} + } + + #[inline] + fn translate_query_inner_set(&mut self, qry_id: QueryID, inner: &InnerSet) -> InnerSet { + // collect mapping from output columns to input expressions + let in_map: HashMap = { + let subq = self.qry_set.get(&qry_id).unwrap(); + let out_cols = subq.out_cols(); + out_cols.iter() + .enumerate() + .filter_map(|(i, pc)| { + let qry_col = QryCol(qry_id, ColIndex::from(i as u32)); + self.pred_map.qry_col_map.get(&qry_col) + .map(|gid| (*gid, pc.expr.clone())) + }) + .collect() + }; + let mut new_inner = InnerSet::default(); + // translate inner set + for eq_set in &inner.eq_sets { + let mut new_gid_set = HashSet::new(); + let mut new_expr_set = HashSet::new(); + for gid in eq_set { + match &in_map[gid] { + ExprKind::Col(Col{gid, ..}) => { + new_gid_set.insert(*gid); + } + e => { + new_expr_set.insert(e.clone()); + } + } + } + match (new_gid_set.len(), new_expr_set.len()) { + (0, 0) => unreachable!(), + // e.g. SELECT c1, c2 FROM (SELECT c1+1 as c1, c2+2 as c2 FROM t1) t WHERE c1 = c2 + (0, _) => { + let mut expr_iter = new_expr_set.into_iter(); + let e1 = expr_iter.next().unwrap(); + while let Some(e2) = expr_iter.next() { + let e = ExprKind::pred_func(PredFuncKind::Equal, vec![e1.clone(), e2]); + new_inner.handle_filt(e.into()); + } + } + // e.g. SELECT c1, c2 FROM (SELECT c1, c2 FROM t1) t WHERE c1 = c2 + (_, 0) => { + let gid = new_gid_set.into_iter().next().unwrap(); + let col = self.pred_map.col_map[&gid].clone(); + for expr in new_expr_set { + let e = ExprKind::pred_func(PredFuncKind::Equal, vec![ExprKind::Col(col.clone()), expr]); + new_inner.handle_filt(e.into()); + } + } + // e.g. SELECT c1, c2 FROM (SELECT c1, 1 as c2 FROM t1) t WHERE c1 = c2 + // e.g. SELECT c1, c2, c3 FROM (SELECT c1, c2, 1 as c3 FROM t1) t WHERE c1 = c2 AND c2 = c3 + (_, _) => { + let gid = new_gid_set.iter().next().cloned().unwrap(); + let col = self.pred_map.col_map[&gid].clone(); + for expr in new_expr_set { + let e = ExprKind::pred_func(PredFuncKind::Equal, vec![ExprKind::Col(col.clone()), expr]); + new_inner.handle_filt(e.into()); + } + if new_gid_set.len() > 1 { + new_inner.handle_eq_set(new_gid_set); + } + } + } + } + // translate filter expression + for p in &inner.filt { + let new_p = p.replace_cols_with_exprs(&in_map).unwrap(); + new_inner.handle_filt(new_p); + } + // translate functional dependencies + for (gid, deps) in &inner.fnl_deps { + for dep in deps { + let e = &in_map[gid]; + if e == dep { + // e.g. SELECT 1 as c1 FROM t1. + // functional dependency is c1 -> 1. + // If we translate functional dependency back, we got + // predicate 1 = 1. + // So we check if this can be skipped. + continue + } + if let ExprKind::Col(c) = e { + // still an column, just keep the dependency. + // e.g. + let new_gid = c.gid; + new_inner.handle_dep(new_gid, dep.clone()); + } else { + let mut ri = RewriteExprIn(&in_map); + let mut new_dep = dep.clone(); + assert!(new_dep.walk_mut(&mut ri).is_continue()); + let new_e = ExprKind::pred_func(PredFuncKind::Equal, vec![e.clone(), new_dep.clone()]); + new_inner.handle_filt(new_e.into()); + } + } + } + new_inner + } + + #[inline] + fn push_query(&mut self, qry_id: QueryID, inner: InnerSet) -> Result<()> { + if !inner.is_empty() { + let inner = self.translate_query_inner_set(qry_id, &inner); + // after translating inner set of subquery, we try to push it. + // if we cannot push, create a new filter to hold all the predicates. + // e.g. SELECT * FORM (SELECT c1 FROM t1 LIMIT 1) WHERE c1 > 0 + // PPD will first remove the outer filter and try to push to subquery. + // but root operator of subquery is limit, which can not be pushed. + // so the predicate will fall back to outer query, and then we create + // a new filter to hold it. + return pushdown_pred(self.qry_set, qry_id, self.pred_map, self.op_id, self.col_id, inner) + } + pushdown_pred(self.qry_set, qry_id, self.pred_map, self.op_id, self.col_id, inner) + } + + #[inline] + fn resolve_tbl_dep(&mut self, dep: FnlDep, exported_cols: &HashSet, eq_sets: &[HashSet]) -> Option<(ExprKind, Col)> { + let mut key_cols = vec![]; + for key in &dep.keys[..] { + if let ExprKind::Col(Col{idx, kind: ColKind::Table(table_id, ..), ..}) = key { + let key_tbl_col = TblCol(*table_id, *idx); + key_cols.push(key_tbl_col); + } else { + // key is not column, fail. + return None + } + } + // check if key match original dependency + let orig_keys: Vec<&Vec> = self.pred_map.tbl_dep_map[&dep.tbl_col].iter() + .filter(|key| key.len() == key_cols.len()) + .collect(); + if orig_keys.is_empty() { // key number mismatch + return None + } + for orig_key in orig_keys { + if orig_key.len() != key_cols.len() { + continue + } + let mut perm_keys = permute_keys(orig_key, &eq_sets); + let mut test_key = Vec::with_capacity(orig_key.len()); + while perm_keys.next(&mut test_key) { + if test_key == key_cols { + // dependency can convert to predicate + let gids = &self.pred_map.tbl_col_map[&dep.tbl_col]; // at least one + if let Some(gid) = gids.iter().find(|gid| exported_cols.contains(gid)) { + let c = self.pred_map.col_map[gid].clone(); + return Some((ExprKind::FnlDep(dep), c)) + } else { + // generate table column with new gid + let mut c = self.pred_map.col_map[gids.iter().next().unwrap()].clone(); + c.gid = self.col_id.inc_fetch(); + return Some((ExprKind::FnlDep(dep), c)) + } + } + } + } + None + } + + #[inline] + fn apply_scan(&mut self, table_id: TableID, cols: &[ProjCol], filt: &mut Vec, inner: InnerSet) -> Result<()> { + assert!(filt.is_empty()); + // convert equal set to filter expression + for eq_set in &inner.eq_sets { + let mut eq_set_iter = eq_set.iter(); + let gid1 = eq_set_iter.next().cloned().unwrap(); + let c1 = self.pred_map.col_map[&gid1].clone(); + while let Some(gid2) = eq_set_iter.next() { + let c2 = self.pred_map.col_map[&gid2].clone(); + let e = ExprKind::pred_func(PredFuncKind::Equal, vec![ExprKind::Col(c1.clone()), ExprKind::Col(c2)]); + filt.push(e); + } + } + let exported_cols: HashSet<_> = cols.iter().map(|pc| pc.expr.col_gid().unwrap()).collect(); + let eq_sets = inner.eq_sets.iter().map(|eq_set| { + eq_set.iter().map(|gid| { + let col = &self.pred_map.col_map[gid]; // must be table column + TblCol(table_id, col.idx) + }).collect::>() + }).collect::>(); + // apply filter expression, we need to check if any functional dependency exists. + 'FILT_LOOP: + for p in inner.filt { + let fnl_deps = p.find_fnl_dep(); + if fnl_deps.is_empty() { + let e = p.into_expr(&self.pred_map.col_map).unwrap(); // won't fail + filt.push(e) + } else { + // try to resolve dependency to column + let mut expr_map = HashMap::new(); + for dep in fnl_deps { + if let Some((dep, col)) = self.resolve_tbl_dep(dep, &exported_cols, &eq_sets) { + expr_map.insert(dep, col); + } else { + // drop the predicate if dependency is not resolvable. + continue 'FILT_LOOP + } + } + let e = p.replace_exprs_with_cols(&expr_map, &self.pred_map.col_map) + .and_then(|p| p.into_expr(&self.pred_map.col_map)).unwrap(); + filt.push(e); + } + } + Ok(()) + } + + #[inline] + fn extract_inner(&mut self, op: &mut Op) -> InnerSet { + let mut inner = self.pred_map.remove_inner(op.id); + match &mut op.kind { + OpKind::Filt {pred, ..} => { + for e in mem::take(pred) { + inner.handle_filt(e.into()); + } + } + OpKind::Aggr(aggr) => { + for e in mem::take(&mut aggr.filt) { + inner.handle_filt(e.into()); + } + } + OpKind::Join(join) => match &mut **join { + Join::Qualified(QualifiedJoin{filt, ..}) => { + for e in mem::take(filt) { + inner.handle_filt(e.into()); + } + } + _ => (), + } + OpKind::Scan(scan) => { + for e in mem::take(&mut scan.filt) { + inner.handle_filt(e.into()); + } + } + _ => (), + } + inner + } + + // try to push inner set to child node. + #[inline] + fn try_push(&mut self, op: &mut Op, inner: InnerSet) -> Option { + if inner.is_empty() { + return None + } + match &op.kind { + OpKind::Limit{..} | OpKind::Sort{limit: Some(_), ..} => { + // predicate can not be pushed down across limit operator. + Some(inner) + } + _ => { + self.pred_map.merge_inner(op.id, inner); + None + } + } + } +} + +#[inline] +fn permute_keys<'a, T: Clone + Hash + PartialEq + Eq>(key: &'a [T], eq_sets: &'a [HashSet]) -> Permutation<'a, T> { + let mut key_sets = vec![]; + for k in key { + if let Some(eq_set) = eq_sets.iter().find(|s| s.contains(k)) { + let key_set: Vec<&T> = eq_set.iter().collect(); + key_sets.push(key_set); + } else { + key_sets.push(vec![k]); + } + } + Permutation::new(key_sets) +} + +struct Permutation<'a, T> { + key_sets: Vec>, + div: Vec, + start_idx: usize, + end_idx: usize, +} + +impl<'a, T: Clone> Permutation<'a, T> { + #[inline] + fn new(key_sets: Vec>) -> Self { + let mut end_idx = 1; + for key_set in &key_sets { + end_idx *= key_set.len(); + } + let mut div = vec![0usize; key_sets.len() - 1]; + let mut n = 1; + let mut i = key_sets.len() - 1; + while i > 0 { + n *= key_sets[i].len(); + i -= 1; + div[i] = n; + } + Permutation{key_sets, div, start_idx: 0, end_idx} + } + + #[inline] + fn next(&mut self, buf: &mut Vec) -> bool { + if self.start_idx == self.end_idx { + return false + } + buf.clear(); + let mut idx = self.start_idx; + for (key_set, div) in self.key_sets.iter().zip(self.div.iter()) { + let k_idx = idx / div; + buf.push(key_set[k_idx].clone()); + idx -= k_idx * div; + } + buf.push(self.key_sets[self.key_sets.len()-1][idx].clone()); + self.start_idx += 1; + true + } +} + +struct CollectCols<'a>{ + qry_set: &'a mut QuerySet, + pred_map: &'a mut PredMap, +} + +impl OpVisitor for CollectCols<'_> { + type Cont = (); + type Break = Error; + + #[inline] + fn enter(&mut self, op: &Op) -> ControlFlow { + match &op.kind { + OpKind::Query(qid) => { + collect_cols(self.qry_set, *qid, self.pred_map).branch()?; + } + _ => { + // collect column mapping + for e in op.kind.exprs() { + let mut cm = CollectColMapping { + qry_col_map: &mut self.pred_map.qry_col_map, + tbl_col_map: &mut self.pred_map.tbl_col_map, + col_map: &mut self.pred_map.col_map, + }; + let _ = e.walk(&mut cm); + } + } + } + ControlFlow::Continue(()) + } +} + +#[inline] +fn eq_set_all_included_in_groups(eq_set: &HashSet, groups: &[ExprKind]) -> bool { + eq_set.iter().all(|g| { + groups.iter().any(|e| if let ExprKind::Col(Col{gid,..}) = e { + gid == g + } else { false }) + }) +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::lgc::tests::{ + assert_j_plan1, extract_join_kinds, get_subq_by_location, get_table_filt_expr, j_catalog, + print_plan, + }; + use crate::lgc::{LgcPlan, Location}; + + #[test] + fn test_permutation() { + let eq_sets: Vec> = vec![(1..4).collect(), (4..5).collect(), (5..7).collect()]; + let key = vec![1, 4, 5]; + let mut perm = permute_keys(&key, &eq_sets); + let mut res = vec![]; + let mut buf = vec![]; + while perm.next(&mut buf) { + res.push(buf.clone()); + } + assert!(res.len() == 6); + } + + #[test] + fn test_pred_pushdown_single_table() { + let cat = j_catalog(); + assert_j_plan1( + &cat, + "select 1 from t1 where c1 = 0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select 1 from t1 where c0 + c1 = c1 + c0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select c1 from t1 having c1 = 0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select 1 from (select c1 from t1) x1 where x1.c1 = 0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select 1 from (select c1 from t1 where c1 > 0) x1", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select c1, count(*) from t1 group by c1 having c1 > 0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select 1 from (select c1 from t1 order by c0) x1 where c1 > 0", + assert_filt_on_disk_table1, + ); + assert_j_plan1( + &cat, + "select 1 from (select c1 from t1 order by c0 limit 10) x1 where c1 > 0", + assert_no_filt_on_disk_table, + ); + + // todo: as we identify the predicate is always true or false, + // we can eliminate it or the whole operator subtree. + // + // assert_j_plan1( + // &cat, + // "select 1 from (select 1 as c1 from t1) x1 where c1 > 0", + // assert_no_filt_on_disk_table, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from (select 1 as c1 from t1) x1 where c1 = 0", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // if let OpKind::Proj { input, .. } = &subq.root.kind { + // assert!(input.is_empty()); + // } + // }, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from (select null as c1 from t1) x1 where c1 = 0", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // if let OpKind::Proj { input, .. } = &subq.root.kind { + // assert!(input.is_empty()); + // } + // }, + // ) + } + + // #[test] + // fn test_pred_pushdown_cross_join() { + // let cat = j_catalog(); + // assert_j_plan1( + // &cat, + // "select 1 from t1, t2 where t1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1 from t1, t2 having t1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from (select t1.c1 from t1, t2) x1 where x1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1, count(*) from t1, t2 group by t1.c1 having c1 > 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1, t2 where t1.c1 = t2.c1", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["inner"], jks); + // }, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1, t2, t3 where t1.c1 = t2.c1 and t1.c1 = t3.c1", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["inner", "inner"], jks); + // }, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1, t2, t3 where t1.c1 = t2.c1 and t1.c1 = t3.c1 and t2.c1 = t3.c1", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["inner", "inner"], jks); + // }, + // ) + // } + + // #[test] + // fn test_pred_pushdown_inner_join() { + // let cat = j_catalog(); + // assert_j_plan1( + // &cat, + // "select 1 from t1 join t2 where t1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1 join t2 where c2 = 0", + // assert_filt_on_disk_table1r, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1 from t1 join t2 having t1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from (select t1.c1 from t1 join t2) x1 where x1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1, c2, count(*) from t1 join t2 where t1.c1 = 0 group by t1.c1, t2.c2 having c2 > 100", + // assert_filt_on_disk_table2, + // ); + // } + + // #[test] + // fn test_pred_pushdown_left_join() { + // let cat = j_catalog(); + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 where t1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 where c2 = 0", + // assert_filt_on_disk_table1r, + // ); + // // filter expression NOT rejects null, so cannot be pushed + // // to table scan. + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 where c2 is null", + // assert_no_filt_on_disk_table, + // ); + // // involve both sides, cannot be pushed to table scan, + // // join type will be converted to inner join. + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 where t1.c1 = c2", + // assert_no_filt_on_disk_table, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1 from t1 left join t2 having t1.c1 = 0 order by c1", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from (select t1.c1 from t1 left join t2) x1 where x1.c1 = 0", + // assert_filt_on_disk_table1, + // ); + // assert_j_plan1( + // &cat, + // "select t1.c1, c2, count(*) from t1 left join t2 where t1.c1 = 0 group by t1.c1, t2.c2 having c2 > 100", + // assert_filt_on_disk_table2, + // ); + // // one left join converted to inner join + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 left join t3 on t1.c1 = t3.c3 where t1.c1 = t2.c2", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // // the predicate pushdown only change the topmost join type. + // assert_eq!(vec!["left", "inner"], jks); + // }, + // ); + // // both left joins converted to inner joins, and one more inner join added. + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 left join t3 on t1.c1 = t2.c2 and t1.c1 = t3.c3 where t2.c2 = t3.c3", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // // in this case, the predicate pushdown stops at the first join + // // so second join will not be converted to inner join + // // but will be fixed by predicate propagation rule. + // assert_eq!(vec!["inner", "left"], jks); + // } + // ); + // // both left joins converted to inner joins, and remove as no join condition, + // // one more inner join added. + // assert_j_plan1( + // &cat, + // "select 1 from t1 left join t2 left join t3 where t2.c2 = t3.c3", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // // stops at first join, second will not be converted to inner join + // assert_eq!(vec!["inner", "left"], jks); + // }, + // ); + // // one is pushed as join condition, one is pushed as filter + // assert_j_plan1( + // &cat, + // "select 1 from t1 join t2 left join t3 left join t4 where t1.c1 = t2.c2 and t3.c3 is null", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["left", "left", "inner"], jks); + // } + // ); + // } + + // #[test] + // fn test_pred_pushdown_right_join() { + // let cat = j_catalog(); + // // right join is replaced by left join, so right table 2 is t1. + // assert_j_plan1( + // &cat, + // "select 1 from t1 right join t2 where t1.c1 = 0", + // assert_filt_on_disk_table1r, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1 right join t2 where t2.c2 = 0", + // assert_filt_on_disk_table1, + // ); + // } + + // #[test] + // fn test_pred_pushdown_full_join() { + // let cat = j_catalog(); + // // full join converted to left join + // assert_j_plan1( + // &cat, + // "select 1 from t1 full join t2 where t1.c1 = 0", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq1 = get_subq_by_location(&q1, Location::Disk); + // // converted to right table, then left table + // // the underlying query postion is changed. + // assert!(!get_table_filt_expr(&subq1[1]).is_empty()); + // }, + // ); + // // full join converted to right join, then left join + // assert_j_plan1( + // &cat, + // "select 1 from t1 full join t2 where t2.c2 = 0", + // assert_filt_on_disk_table1r, + // ); + // // full join converted to inner join + // assert_j_plan1( + // &cat, + // "select 1 from t1 full join t2 where t1.c1 = t2.c2", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["inner"], jks); + // }, + // ); + // assert_j_plan1( + // &cat, + // "select 1 from t1 full join t2 on t1.c1 = t2.c2 where t1.c1 is null and t2.c2 is null", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["full"], jks); + // }, + // ); + // // convert to left join and add one filt + // assert_j_plan1( + // &cat, + // "select 1 from t1 full join t2 on t1.c1 = t2.c2 where t1.c0 > 0 and t2.c2 is null", + // |s1, mut q1| { + // pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + // print_plan(s1, &q1); + // let subq = q1.root_query().unwrap(); + // let jks = extract_join_kinds(&subq.root); + // assert_eq!(vec!["left"], jks); + // }, + // ); + // } + + fn assert_filt_on_disk_table1(s1: &str, mut q1: LgcPlan) { + print_plan(s1, &q1); + pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + print_plan(s1, &q1); + let subq1 = get_subq_by_location(&q1, Location::Disk); + assert!(!get_table_filt_expr(&subq1[0]).is_empty()); + } + + fn assert_filt_on_disk_table1r(s1: &str, mut q1: LgcPlan) { + pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + print_plan(s1, &q1); + let subq1 = get_subq_by_location(&q1, Location::Disk); + assert!(!get_table_filt_expr(&subq1[1]).is_empty()); + } + + fn assert_filt_on_disk_table2(s1: &str, mut q1: LgcPlan) { + pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + print_plan(s1, &q1); + let subq1 = get_subq_by_location(&q1, Location::Disk); + assert!(!get_table_filt_expr(&subq1[0]).is_empty()); + assert!(!get_table_filt_expr(&subq1[1]).is_empty()); + } + + fn assert_no_filt_on_disk_table(s1: &str, mut q1: LgcPlan) { + pred_pushdown(&mut q1.qry_set, q1.root, &mut q1.max_cid).unwrap(); + print_plan(s1, &q1); + let subq1 = get_subq_by_location(&q1, Location::Disk); + assert!(subq1 + .into_iter() + .all(|subq| get_table_filt_expr(subq).is_empty())); + } +} + +#[derive(Default, Clone)] +struct ExprAttr { + qry_ids: QryIDs, + has_aggf: bool, + // whether the predicate contains subquery that cannot be pushed down. + has_subq: bool, +} + +impl<'a> ExprVisitor<'a> for ExprAttr { + type Cont = (); + type Break = (); + #[inline] + fn enter(&mut self, e: &ExprKind) -> ControlFlow<()> { + match e { + ExprKind::Aggf { .. } => self.has_aggf = true, + ExprKind::Col(Col { + kind: ColKind::Query(qry_id), + .. + }) => match &mut self.qry_ids { + QryIDs::Empty => { + self.qry_ids = QryIDs::Single(*qry_id); + } + QryIDs::Single(qid) => { + if qid != qry_id { + let mut hs = HashSet::new(); + hs.insert(*qid); + hs.insert(*qry_id); + self.qry_ids = QryIDs::Multi(hs); + } + } + QryIDs::Multi(hs) => { + hs.insert(*qry_id); + } + }, + ExprKind::Subq(..) | ExprKind::Attval(_) => { + self.has_subq = true; + } + _ => (), + } + ControlFlow::Continue(()) + } +} + +/* below is deprecated */ + +// #[derive(Clone)] +// struct CachedExpr { +// e: ExprKind, +// // lazy field +// attr: Option, +// reject_nulls: Option>, +// } + +// impl CachedExpr { +// #[inline] +// fn new(e: ExprKind) -> Self { +// CachedExpr { +// e, +// attr: None, +// reject_nulls: None, +// } +// } + +// #[inline] +// fn load_attr(&self) -> &ExprAttr { +// if self.attr.is_none() { +// let mut attr = ExprAttr::default(); +// let _ = self.e.walk(&mut attr); +// self.attr = Some(attr); +// } +// self.attr.as_ref().unwrap() // won't fail +// } + +// #[inline] +// fn load_reject_null(&mut self, qry_id: QueryID) -> Result { +// if let Some(reject_nulls) = &mut self.reject_nulls { +// let res = match reject_nulls.entry(qry_id) { +// Entry::Occupied(occ) => *occ.get(), +// Entry::Vacant(vac) => { +// let rn = self.e.clone().reject_null(|e| match e { +// ExprKind::Col(Col { +// kind: ColKind::Query(qid), +// .. +// }) if *qid == qry_id => { +// *e = ExprKind::const_null(); +// } +// _ => (), +// })?; +// vac.insert(rn); +// rn +// } +// }; +// Ok(res) +// } else { +// let mut reject_nulls = HashMap::new(); +// let rn = self.e.clone().reject_null(|e| match e { +// ExprKind::Col(Col { +// kind: ColKind::Query(qid), +// .. +// }) if *qid == qry_id => { +// *e = ExprKind::const_null(); +// } +// _ => (), +// })?; +// reject_nulls.insert(qry_id, rn); +// self.reject_nulls = Some(reject_nulls); +// Ok(rn) +// } +// } + +// #[inline] +// fn rewrite(&mut self, qry_id: QueryID, out: &[ProjCol]) { +// let mut roe = RewriteOutExpr { qry_id, out }; +// let _ = self.e.walk_mut(&mut roe); +// // must reset lazy field as the expression changed +// self.attr = None; +// self.reject_nulls = None; +// } +// } + +// #[inline] +// fn push_single( +// qry_set: &mut QuerySet, +// op: &mut Op, +// mut p: ExprItem, +// ) -> Result<(RuleEffect, Option)> { +// let mut eff = RuleEffect::empty(); +// let res = match &mut op.kind { +// OpKind::Query(qry_id) => { +// if let Some(subq) = qry_set.get(qry_id) { +// p.rewrite(*qry_id, subq.out_cols()); +// // after rewriting, Simplify it before pushing +// simplify_nested(&mut p.e, NullCoalesce::Null)?; +// match &p.e { +// ExprKind::Const(Const::Null) => { +// *op = Op::empty(); +// eff |= RuleEffect::OP; +// return Ok((eff, None)); +// } +// ExprKind::Const(c) => { +// if c.is_zero().unwrap_or_default() { +// *op = Op::empty(); +// eff |= RuleEffect::OP; +// return Ok((eff, None)); +// } else { +// return Ok((eff, None)); +// } +// } +// _ => (), +// } +// let e = qry_set.transform_op(*qry_id, |qry_set, _, op| { +// let (e, pred) = push_single(qry_set, op, p)?; +// assert!(pred.is_none()); // this push must succeed +// eff |= e; +// eff |= RuleEffect::EXPR; +// Ok::<_, Error>(eff) +// })??; +// eff |= e; +// None +// } else { +// Some(p) +// } +// } +// // Table always accept +// OpKind::Scan(scan) => { +// scan.filt.push(p.e); +// None +// } +// // Empty just ignores +// OpKind::Empty => None, +// OpKind::Row(_) => todo!(), // todo: evaluate immediately +// // Proj/Sort/Limit/Attach will try pushing pred, and if fails just accept. +// // todo: pushdown to limit should be forbidden +// OpKind::Proj { .. } | OpKind::Sort { .. } | OpKind::Limit { .. } | OpKind::Attach(..) => { +// let (e, item) = push_or_accept(qry_set, op, p)?; +// eff |= e; +// item +// } +// OpKind::Aggr(aggr) => { +// if p.load_attr().has_aggf { +// Some(p) +// } else { +// // after the validation, all expressions containing no aggregate +// // functions can be pushed down through Aggr operator, as they can +// // only be composite of group columns, constants and functions. +// let (e, item) = push_single(qry_set, &mut aggr.input, p)?; +// assert!(item.is_none()); // just succeed +// eff |= e; +// None +// } +// } +// OpKind::Filt { pred, input } => match push_single(qry_set, input, p)? { +// (e, Some(p)) => { +// eff |= e; +// let mut old = mem::take(pred); +// old.push(p.e); +// let mut new = ExprKind::pred_conj(old); +// eff |= simplify_nested(&mut new, NullCoalesce::False)?; +// *pred = new.into_conj(); +// None +// } +// (e, None) => { +// eff |= e; +// eff |= RuleEffect::EXPR; +// None +// } +// }, +// OpKind::Setop(so) => { +// // push to both side and won't fail +// let (e, item) = push_single(qry_set, so.left.as_mut(), p.clone())?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::EXPR; +// let (e, item) = push_single(qry_set, so.right.as_mut(), p)?; +// eff |= e; +// eff |= RuleEffect::EXPR; +// assert!(item.is_none()); +// None +// } +// OpKind::JoinGraph(_) => unreachable!("Predicates pushdown to join graph is not supported"), +// OpKind::Join(join) => match join.as_mut() { +// Join::Cross(jos) => { +// if p.load_attr().has_aggf { +// // do not push predicates with aggregate functions +// Some(p) +// } else { +// let qry_ids = &p.load_attr().qry_ids; +// match qry_ids { +// QryIDs::Empty => unreachable!(), // Currently marked as unreachable +// QryIDs::Single(qry_id) => { +// // predicate of single table +// let mut jo_qids = HashSet::new(); // reused container +// for jo in jos { +// jo_qids.clear(); +// jo.collect_qry_ids(&mut jo_qids); +// if jo_qids.contains(qry_id) { +// let (e, item) = push_single(qry_set, jo.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// return Ok((eff, None)); +// } +// } +// Some(p) +// } +// QryIDs::Multi(qry_ids) => { +// // if involved multiple tables, we convert cross join into join tree +// // currently only two-way join is supported. +// // once cross join are converted as a join tree, these rejected predicates +// // can be pushed further. +// if qry_ids.len() > 2 { +// Some(p) +// } else { +// let (qid1, qid2) = { +// let mut iter = qry_ids.iter(); +// let q1 = iter.next().cloned().unwrap(); +// let q2 = iter.next().cloned().unwrap(); +// (q1, q2) +// }; +// let mut join_ops = mem::take(jos); +// if let Some((idx1, jo)) = join_ops +// .iter() +// .enumerate() +// .find(|(_, jo)| jo.contains_qry_id(qid1)) +// { +// if jo.contains_qry_id(qid2) { +// // belong to single join op, push to it +// *jos = join_ops; +// let (e, item) = +// push_single(qry_set, jos[idx1].as_mut(), p.clone())?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// if let Some(idx2) = +// join_ops.iter().position(|jo| jo.contains_qry_id(qid2)) +// { +// let (jo1, jo2) = if idx1 < idx2 { +// // get larger one first +// let jo2 = join_ops.swap_remove(idx2); +// let jo1 = join_ops.swap_remove(idx1); +// (jo1, jo2) +// } else { +// let jo1 = join_ops.swap_remove(idx1); +// let jo2 = join_ops.swap_remove(idx2); +// (jo2, jo1) +// }; +// if join_ops.is_empty() { +// // entire cross join is converted to inner join tree. +// let new_join = Join::Qualified(QualifiedJoin { +// kind: JoinKind::Inner, +// left: jo1, +// right: jo2, +// cond: vec![p.e], +// filt: vec![], +// }); +// *join.as_mut() = new_join; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } else { +// let new_join = JoinOp::qualified( +// JoinKind::Inner, +// jo1, +// jo2, +// vec![p.e], +// vec![], +// ); +// join_ops.push(new_join); +// *jos = join_ops; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// } else { +// return Err(Error::InvalidJoinCondition); +// } +// } else { +// return Err(Error::InvalidJoinCondition); +// } +// } +// } +// } +// } +// } +// Join::Qualified(QualifiedJoin { +// kind, +// left, +// right, +// cond, +// filt, +// }) => { +// let qry_ids = &p.load_attr().qry_ids; +// match qry_ids { +// QryIDs::Empty => unreachable!(), // Currently marked as unreachable +// QryIDs::Single(qry_id) => { +// let qry_id = *qry_id; +// if left.contains_qry_id(qry_id) { +// match kind { +// JoinKind::Inner | JoinKind::Left => { +// let (e, item) = push_single(qry_set, left.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// JoinKind::Full => { +// if p.load_reject_null(qry_id)? { +// // reject null +// // convert full join to right join, then to left join +// *kind = JoinKind::Left; +// mem::swap(left, right); +// // push to (original) left side +// let (e, item) = push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } else { +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// } +// _ => todo!(), +// } +// } else if right.contains_qry_id(qry_id) { +// match kind { +// JoinKind::Inner => { +// let (e, item) = push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// return Ok((eff, None)); +// } +// JoinKind::Left => { +// if p.load_reject_null(qry_id)? { +// // reject null +// // convert left join to inner join +// *kind = JoinKind::Inner; +// if !filt.is_empty() { +// cond.extend(mem::take(filt)) // put filters into condition +// } +// let (e, item) = push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } else { +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// } +// JoinKind::Full => { +// if p.load_reject_null(qry_id)? { +// // reject null +// // convert full join to left join +// *kind = JoinKind::Left; +// let (e, item) = push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } else { +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// } +// _ => todo!(), +// } +// } else { +// // this should not happen, the predicate must belong to either side +// unreachable!() +// } +// } +// QryIDs::Multi(qry_ids) => { +// let mut left_qids = HashSet::new(); +// left.collect_qry_ids(&mut left_qids); +// let left_qids: HashSet = +// qry_ids.intersection(&left_qids).cloned().collect(); +// let mut right_qids = HashSet::new(); +// right.collect_qry_ids(&mut right_qids); +// let right_qids: HashSet = +// qry_ids.intersection(&right_qids).cloned().collect(); +// match (left_qids.is_empty(), right_qids.is_empty()) { +// (false, true) => { +// // handle on left side +// match kind { +// JoinKind::Inner | JoinKind::Left => { +// let (e, item) = push_single(qry_set, left.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// return Ok((eff, None)); +// } +// JoinKind::Full => { +// for qry_id in left_qids { +// if p.load_reject_null(qry_id)? { +// // convert full join to right join, then to left join +// *kind = JoinKind::Left; +// mem::swap(left, right); +// // push to (original) left side +// let (e, item) = +// push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// } +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// _ => todo!(), +// } +// } +// (true, false) => { +// // handle on right side +// match kind { +// JoinKind::Inner => { +// let (e, item) = push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// return Ok((eff, None)); +// } +// JoinKind::Left => { +// for qry_id in right_qids { +// if p.load_reject_null(qry_id)? { +// // convert left join to inner join +// *kind = JoinKind::Inner; +// if !filt.is_empty() { +// // put filters into condition +// cond.extend(mem::take(filt)); +// } +// let (e, item) = +// push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OPEXPR; +// return Ok((e, None)); +// } +// } +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// JoinKind::Full => { +// for qry_id in right_qids { +// if p.load_reject_null(qry_id)? { +// // convert full join to left join +// *kind = JoinKind::Left; +// let (e, item) = +// push_single(qry_set, right.as_mut(), p)?; +// assert!(item.is_none()); +// eff |= e; +// eff |= RuleEffect::OP; +// return Ok((eff, None)); +// } +// } +// // not reject null +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// _ => todo!(), +// } +// } +// (false, false) => { +// // handle on both sides +// match kind { +// JoinKind::Inner => { +// cond.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// JoinKind::Left => { +// for qry_id in right_qids { +// if p.load_reject_null(qry_id)? { +// // convert left join to inner join +// *kind = JoinKind::Inner; +// if !filt.is_empty() { +// cond.extend(mem::take(filt)) +// // put filters into condition +// } +// cond.push(p.e); +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// } +// // not reject null on right side +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// JoinKind::Full => { +// let mut left_reject_null = false; +// let mut right_reject_null = false; +// for qry_id in left_qids { +// if p.load_reject_null(qry_id)? { +// left_reject_null = true; +// break; +// } +// } +// for qry_id in right_qids { +// if p.load_reject_null(qry_id)? { +// right_reject_null = true; +// break; +// } +// } +// match (left_reject_null, right_reject_null) { +// (true, true) => { +// // convert to inner join +// *kind = JoinKind::Inner; +// if !filt.is_empty() { +// cond.extend(mem::take(filt)) +// } +// cond.push(p.e); +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// (true, false) => { +// // convert to right join then left join +// *kind = JoinKind::Left; +// mem::swap(left, right); +// filt.push(p.e); +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// (false, true) => { +// // convert to left join +// *kind = JoinKind::Left; +// filt.push(p.e); +// eff |= RuleEffect::OPEXPR; +// return Ok((eff, None)); +// } +// (false, false) => { +// filt.push(p.e); +// eff |= RuleEffect::EXPR; +// return Ok((eff, None)); +// } +// } +// } +// _ => todo!(), +// } +// } +// (true, true) => unreachable!(), +// } +// } +// } +// } +// }, +// }; +// Ok((eff, res)) +// } + +// #[inline] +// fn push_or_accept( +// qry_set: &mut QuerySet, +// op: &mut Op, +// pred: ExprItem, +// ) -> Result<(RuleEffect, Option)> { +// let input = op.kind.input_mut().unwrap(); // won't fail +// match push_single(qry_set, input, pred)? { +// (eff, Some(pred)) => { +// let child = mem::take(input); +// let new_filt = Op::new(OpKind::filt(vec![pred.e], child)); +// *input = new_filt; +// // as child reject it, we do not merge effect, as parent will update it +// Ok((eff, None)) +// } +// (eff, None) => Ok((eff, None)), +// } +// } + +// struct RewriteOutExpr<'a> { +// qry_id: QueryID, +// out: &'a [ProjCol], +// } + +// impl ExprMutVisitor for RewriteOutExpr<'_> { +// type Cont = (); +// type Break = (); +// #[inline] +// fn leave(&mut self, e: &mut ExprKind) -> ControlFlow<()> { +// if let ExprKind::Col(Col { +// kind: ColKind::Query(qry_id), +// idx, +// .. +// }) = e +// { +// if *qry_id == self.qry_id { +// let new_c = &self.out[idx.value() as usize]; +// *e = new_c.expr.clone(); +// } +// } +// ControlFlow::Continue(()) +// } +// } diff --git a/xngin-plan/src/rule/pred_pushdown.rs b/doradb-plan/src/rule/pred_pushdown_old.rs similarity index 99% rename from xngin-plan/src/rule/pred_pushdown.rs rename to doradb-plan/src/rule/pred_pushdown_old.rs index e5af260..2b3f24e 100644 --- a/xngin-plan/src/rule/pred_pushdown.rs +++ b/doradb-plan/src/rule/pred_pushdown_old.rs @@ -6,9 +6,9 @@ use crate::rule::RuleEffect; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::mem; -use xngin_expr::controlflow::{Branch, ControlFlow, Unbranch}; -use xngin_expr::fold::Fold; -use xngin_expr::{Col, ColKind, Const, ExprKind, ExprMutVisitor, ExprVisitor, QueryID}; +use doradb_expr::controlflow::{Branch, ControlFlow, Unbranch}; +use doradb_expr::fold::Fold; +use doradb_expr::{Col, ColKind, Const, ExprKind, ExprExt, ExprMutVisitor, ExprVisitor, QueryID}; /// Pushdown predicates. #[inline] diff --git a/xngin-protocol/Cargo.toml b/doradb-protocol/Cargo.toml similarity index 66% rename from xngin-protocol/Cargo.toml rename to doradb-protocol/Cargo.toml index 8e84386..b3796fa 100644 --- a/xngin-protocol/Cargo.toml +++ b/doradb-protocol/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-protocol" +name = "doradb-protocol" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0" description = "Protocol module of X-Engine" keywords = ["protocol", "mysql"] categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-protocol/" +repository = "https://github.com/jiangzhe/doradb/doradb-protocol/" [dependencies] thiserror = "1.0" @@ -28,12 +28,12 @@ flume = "0.10" signal-hook = "0.3" thread_local = "1.1" scopeguard = "1.1" -xngin-storage = { version = "0.1.0", path = "../xngin-storage" } +doradb-storage = { version = "0.1.0", path = "../doradb-storage" } semistr = "0.1" fxd = "0.1" -xngin-sql = { version = "0.1.0", path = "../xngin-sql" } -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } -xngin-plan = { version = "0.1.0", path = "../xngin-plan" } +doradb-sql = { version = "0.1.0", path = "../doradb-sql" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } +doradb-plan = { version = "0.1.0", path = "../doradb-plan" } [dev-dependencies] async-task = "4.2" diff --git a/xngin-protocol/README.md b/doradb-protocol/README.md similarity index 62% rename from xngin-protocol/README.md rename to doradb-protocol/README.md index 4ee619c..1a57240 100644 --- a/xngin-protocol/README.md +++ b/doradb-protocol/README.md @@ -1,6 +1,6 @@ -# Xngin Protocol +# doradb Protocol -This is implementation of mysql on-wire protocol for xngin server. +This is implementation of mysql on-wire protocol for doradb server. Start a local docker image for unit tests. diff --git a/xngin-protocol/src/buf.rs b/doradb-protocol/src/buf.rs similarity index 100% rename from xngin-protocol/src/buf.rs rename to doradb-protocol/src/buf.rs diff --git a/xngin-protocol/src/lib.rs b/doradb-protocol/src/lib.rs similarity index 100% rename from xngin-protocol/src/lib.rs rename to doradb-protocol/src/lib.rs diff --git a/xngin-protocol/src/mysql/auth.rs b/doradb-protocol/src/mysql/auth.rs similarity index 100% rename from xngin-protocol/src/mysql/auth.rs rename to doradb-protocol/src/mysql/auth.rs diff --git a/xngin-protocol/src/mysql/cmd/mod.rs b/doradb-protocol/src/mysql/cmd/mod.rs similarity index 100% rename from xngin-protocol/src/mysql/cmd/mod.rs rename to doradb-protocol/src/mysql/cmd/mod.rs diff --git a/xngin-protocol/src/mysql/cmd/query.rs b/doradb-protocol/src/mysql/cmd/query.rs similarity index 100% rename from xngin-protocol/src/mysql/cmd/query.rs rename to doradb-protocol/src/mysql/cmd/query.rs diff --git a/xngin-protocol/src/mysql/col.rs b/doradb-protocol/src/mysql/col.rs similarity index 100% rename from xngin-protocol/src/mysql/col.rs rename to doradb-protocol/src/mysql/col.rs diff --git a/xngin-protocol/src/mysql/conn.rs b/doradb-protocol/src/mysql/conn.rs similarity index 100% rename from xngin-protocol/src/mysql/conn.rs rename to doradb-protocol/src/mysql/conn.rs diff --git a/xngin-protocol/src/mysql/error.rs b/doradb-protocol/src/mysql/error.rs similarity index 94% rename from xngin-protocol/src/mysql/error.rs rename to doradb-protocol/src/mysql/error.rs index f303278..455b868 100644 --- a/xngin-protocol/src/mysql/error.rs +++ b/doradb-protocol/src/mysql/error.rs @@ -137,18 +137,18 @@ impl From for ErrPacket<'static> { } } -impl From for Error { +impl From for Error { #[inline] - fn from(src: xngin_sql::error::Error) -> Self { + fn from(src: doradb_sql::error::Error) -> Self { match src { - xngin_sql::error::Error::SyntaxError(_) => Error::SyntaxError(), + doradb_sql::error::Error::SyntaxError(_) => Error::SyntaxError(), } } } -impl From for Error { +impl From for Error { #[inline] - fn from(src: xngin_plan::error::Error) -> Self { + fn from(src: doradb_plan::error::Error) -> Self { Error::PlanError(Box::new(src.to_string())) } } diff --git a/xngin-protocol/src/mysql/flag.rs b/doradb-protocol/src/mysql/flag.rs similarity index 100% rename from xngin-protocol/src/mysql/flag.rs rename to doradb-protocol/src/mysql/flag.rs diff --git a/xngin-protocol/src/mysql/handshake.rs b/doradb-protocol/src/mysql/handshake.rs similarity index 100% rename from xngin-protocol/src/mysql/handshake.rs rename to doradb-protocol/src/mysql/handshake.rs diff --git a/xngin-protocol/src/mysql/macros.rs b/doradb-protocol/src/mysql/macros.rs similarity index 100% rename from xngin-protocol/src/mysql/macros.rs rename to doradb-protocol/src/mysql/macros.rs diff --git a/xngin-protocol/src/mysql/mod.rs b/doradb-protocol/src/mysql/mod.rs similarity index 88% rename from xngin-protocol/src/mysql/mod.rs rename to doradb-protocol/src/mysql/mod.rs index 188b733..c0dd66e 100644 --- a/xngin-protocol/src/mysql/mod.rs +++ b/doradb-protocol/src/mysql/mod.rs @@ -24,7 +24,7 @@ impl Default for ServerSpec { #[inline] fn default() -> Self { ServerSpec { - version: String::from("mysql-8.0.30-xngin"), + version: String::from("mysql-8.0.30-doradb"), protocol_version: 10, } } diff --git a/xngin-protocol/src/mysql/packet.rs b/doradb-protocol/src/mysql/packet.rs similarity index 100% rename from xngin-protocol/src/mysql/packet.rs rename to doradb-protocol/src/mysql/packet.rs diff --git a/xngin-protocol/src/mysql/principal.rs b/doradb-protocol/src/mysql/principal.rs similarity index 100% rename from xngin-protocol/src/mysql/principal.rs rename to doradb-protocol/src/mysql/principal.rs diff --git a/xngin-protocol/src/mysql/resultset.rs b/doradb-protocol/src/mysql/resultset.rs similarity index 100% rename from xngin-protocol/src/mysql/resultset.rs rename to doradb-protocol/src/mysql/resultset.rs diff --git a/xngin-protocol/src/mysql/serde/de.rs b/doradb-protocol/src/mysql/serde/de.rs similarity index 100% rename from xngin-protocol/src/mysql/serde/de.rs rename to doradb-protocol/src/mysql/serde/de.rs diff --git a/xngin-protocol/src/mysql/serde/mod.rs b/doradb-protocol/src/mysql/serde/mod.rs similarity index 100% rename from xngin-protocol/src/mysql/serde/mod.rs rename to doradb-protocol/src/mysql/serde/mod.rs diff --git a/xngin-protocol/src/mysql/serde/ser.rs b/doradb-protocol/src/mysql/serde/ser.rs similarity index 100% rename from xngin-protocol/src/mysql/serde/ser.rs rename to doradb-protocol/src/mysql/serde/ser.rs diff --git a/xngin-protocol/src/mysql/time.rs b/doradb-protocol/src/mysql/time.rs similarity index 100% rename from xngin-protocol/src/mysql/time.rs rename to doradb-protocol/src/mysql/time.rs diff --git a/xngin-protocol/src/mysql/value.rs b/doradb-protocol/src/mysql/value.rs similarity index 100% rename from xngin-protocol/src/mysql/value.rs rename to doradb-protocol/src/mysql/value.rs diff --git a/xngin-server/Cargo.toml b/doradb-server/Cargo.toml similarity index 55% rename from xngin-server/Cargo.toml rename to doradb-server/Cargo.toml index ce48e75..444506d 100644 --- a/xngin-server/Cargo.toml +++ b/doradb-server/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-server" +name = "doradb-server" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0" description = "Server module of X-Engine" keywords = ["server", "mysql"] categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-server/" +repository = "https://github.com/jiangzhe/doradb/doradb-server/" [dependencies] thiserror = "1.0" @@ -30,19 +30,19 @@ flume = "0.10" signal-hook = "0.3" thread_local = "1.1" scopeguard = "1.1" -xngin-storage = { version = "0.1.0", path = "../xngin-storage" } +doradb-storage = { version = "0.1.0", path = "../doradb-storage" } semistr = "0.1" fxd = "0.1" pin-project-lite = "0.2" -xngin-sql = { version = "0.1.0", path = "../xngin-sql" } -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } -xngin-plan = { version = "0.1.0", path = "../xngin-plan" } -xngin-protocol = { version = "0.1.0", path = "../xngin-protocol" } -xngin-compute = { version = "0.1.0", path = "../xngin-compute" } +doradb-sql = { version = "0.1.0", path = "../doradb-sql" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } +doradb-plan = { version = "0.1.0", path = "../doradb-plan" } +doradb-protocol = { version = "0.1.0", path = "../doradb-protocol" } +doradb-compute = { version = "0.1.0", path = "../doradb-compute" } [dev-dependencies] async-task = "4.2" env_logger = "0.10" -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } -xngin-expr = { version = "0.1.0", path = "../xngin-expr" } +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } +doradb-expr = { version = "0.1.0", path = "../doradb-expr" } diff --git a/xngin-server/src/cancel.rs b/doradb-server/src/cancel.rs similarity index 99% rename from xngin-server/src/cancel.rs rename to doradb-server/src/cancel.rs index e505a52..7e1a90a 100644 --- a/xngin-server/src/cancel.rs +++ b/doradb-server/src/cancel.rs @@ -7,7 +7,7 @@ use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::task::{Context, Poll}; -use xngin_protocol::mysql::error::{Error, Result}; +use doradb_protocol::mysql::error::{Error, Result}; /// Cancellation represents a handle that can cancel future and stream /// processing. diff --git a/xngin-server/src/chan.rs b/doradb-server/src/chan.rs similarity index 94% rename from xngin-server/src/chan.rs rename to doradb-server/src/chan.rs index f37b959..d84a428 100644 --- a/xngin-server/src/chan.rs +++ b/doradb-server/src/chan.rs @@ -1,8 +1,8 @@ use crate::cancel::Cancellation; use flume::{Receiver, Sender}; use futures_lite::Stream; -use xngin_protocol::mysql::error::{Error, Result}; -use xngin_storage::block::Block; +use doradb_protocol::mysql::error::{Error, Result}; +use doradb_storage::block::Block; pub type ExecChannel = (InputChannel, OutputChannel); diff --git a/xngin-server/src/exec/builder.rs b/doradb-server/src/exec/builder.rs similarity index 81% rename from xngin-server/src/exec/builder.rs rename to doradb-server/src/exec/builder.rs index 4f6e129..3aa1ce7 100644 --- a/xngin-server/src/exec/builder.rs +++ b/doradb-server/src/exec/builder.rs @@ -1,8 +1,8 @@ use crate::exec::ExecPlan; -use xngin_plan::digraph::{DiGraph, NodeIndex}; -use xngin_plan::phy::{Phy, PhyKind, PhyPlan}; -use xngin_protocol::mysql::error::{Error, Result}; -// use xngin_storage::bitmap::{Bitmap, bitmap_u8s_set, bitmap_u8s_get}; +use doradb_plan::digraph::{DiGraph, NodeIndex}; +use doradb_plan::phy::{Phy, PhyKind, PhyPlan}; +use doradb_protocol::mysql::error::{Error, Result}; +// use doradb_storage::bitmap::{Bitmap, bitmap_u8s_set, bitmap_u8s_get}; use std::collections::VecDeque; pub struct ExecBuilder<'a> { diff --git a/xngin-server/src/exec/mod.rs b/doradb-server/src/exec/mod.rs similarity index 96% rename from xngin-server/src/exec/mod.rs rename to doradb-server/src/exec/mod.rs index f79605e..2d7ba3a 100644 --- a/xngin-server/src/exec/mod.rs +++ b/doradb-server/src/exec/mod.rs @@ -16,9 +16,9 @@ use futures_lite::Stream; pub use proj::ProjExec; use std::collections::VecDeque; use std::sync::Arc; -// use xngin_plan::phy::PhyPlan; -use xngin_protocol::mysql::error::Result; -use xngin_storage::block::Block; +// use doradb_plan::phy::PhyPlan; +use doradb_protocol::mysql::error::Result; +use doradb_storage::block::Block; pub struct ExecPlan { /// topology sorted executable nodes. @@ -179,12 +179,12 @@ mod tests { use crate::tests::single_thread_executor; use futures_lite::future; use futures_lite::StreamExt; - use xngin_compute::eval::QueryEvalPlan; - use xngin_datatype::PreciseType; - use xngin_expr::{ + use doradb_compute::eval::QueryEvalPlan; + use doradb_datatype::PreciseType; + use doradb_expr::{ Col, ColIndex, ColKind, Const, ExprKind, FuncKind, GlobalID, QueryID, TypeFix, TypeInferer, }; - use xngin_storage::attr::Attr; + use doradb_storage::attr::Attr; #[test] fn test_proj_exec() { diff --git a/xngin-server/src/exec/proj.rs b/doradb-server/src/exec/proj.rs similarity index 94% rename from xngin-server/src/exec/proj.rs rename to doradb-server/src/exec/proj.rs index 03ff1c9..036c955 100644 --- a/xngin-server/src/exec/proj.rs +++ b/doradb-server/src/exec/proj.rs @@ -3,9 +3,9 @@ use crate::exec::{ExecCtx, Executable, Work}; use async_trait::async_trait; use futures_lite::StreamExt; use std::sync::Arc; -use xngin_compute::eval::QueryEvalPlan; -use xngin_protocol::mysql::error::{Error, Result}; -use xngin_storage::block::Block; +use doradb_compute::eval::QueryEvalPlan; +use doradb_protocol::mysql::error::{Error, Result}; +use doradb_storage::block::Block; pub struct ProjExec { eval_plan: Arc, diff --git a/xngin-server/src/lib.rs b/doradb-server/src/lib.rs similarity index 100% rename from xngin-server/src/lib.rs rename to doradb-server/src/lib.rs diff --git a/xngin-server/src/mysql/mod.rs b/doradb-server/src/mysql/mod.rs similarity index 93% rename from xngin-server/src/mysql/mod.rs rename to doradb-server/src/mysql/mod.rs index 2c71587..eef01ef 100644 --- a/xngin-server/src/mysql/mod.rs +++ b/doradb-server/src/mysql/mod.rs @@ -9,9 +9,9 @@ use easy_parallel::Parallel; use flume::{Receiver, Sender}; use std::sync::atomic::AtomicU32; use std::sync::Arc; -use xngin_catalog::Catalog; -use xngin_protocol::mysql::error::{Error, Result}; -use xngin_protocol::mysql::ServerSpec; +use doradb_catalog::Catalog; +use doradb_protocol::mysql::error::{Error, Result}; +use doradb_protocol::mysql::ServerSpec; const DEFAULT_SERVER_THREADS: usize = 1; @@ -79,9 +79,9 @@ mod tests { use async_io::{block_on, Timer}; use std::thread; use std::time::Duration; - use xngin_catalog::mem_impl::MemCatalog; - use xngin_protocol::buf::ByteBuffer; - use xngin_protocol::mysql::conn::TcpClientOpts; + use doradb_catalog::mem_impl::MemCatalog; + use doradb_protocol::buf::ByteBuffer; + use doradb_protocol::mysql::conn::TcpClientOpts; #[test] fn test_server_start() -> Result<()> { diff --git a/xngin-server/src/mysql/serve_tcp.rs b/doradb-server/src/mysql/serve_tcp.rs similarity index 91% rename from xngin-server/src/mysql/serve_tcp.rs rename to doradb-server/src/mysql/serve_tcp.rs index 95a024d..43fe061 100644 --- a/xngin-server/src/mysql/serve_tcp.rs +++ b/doradb-server/src/mysql/serve_tcp.rs @@ -4,10 +4,10 @@ use async_executor::Executor; use async_net::TcpListener; use std::sync::atomic::Ordering; use std::sync::Arc; -use xngin_catalog::Catalog; -use xngin_protocol::mysql::conn::{Buf, MyConn}; -use xngin_protocol::mysql::error::Result; -use xngin_protocol::mysql::serde::{SerdeCtx, SerdeMode}; +use doradb_catalog::Catalog; +use doradb_protocol::mysql::conn::{Buf, MyConn}; +use doradb_protocol::mysql::error::Result; +use doradb_protocol::mysql::serde::{SerdeCtx, SerdeMode}; const DEFAULT_READ_BUF_SIZE: usize = 16 * 1024; const DEFAULT_WRITE_BUF_SIZE: usize = 16 * 1024; diff --git a/xngin-server/src/mysql/session.rs b/doradb-server/src/mysql/session.rs similarity index 87% rename from xngin-server/src/mysql/session.rs rename to doradb-server/src/mysql/session.rs index 055ca2c..38da1b1 100644 --- a/xngin-server/src/mysql/session.rs +++ b/doradb-server/src/mysql/session.rs @@ -1,18 +1,18 @@ use crate::mysql::MySQLServer; use futures_lite::{AsyncRead, AsyncWrite}; -use xngin_catalog::Catalog; -use xngin_plan::lgc::LgcPlan; -// use xngin_plan::phy::PhyPlan; -use xngin_protocol::buf::ByteBuffer; -use xngin_protocol::mysql::cmd::MyCmd; -use xngin_protocol::mysql::conn::MyConn; -use xngin_protocol::mysql::error::{Error, Result}; -use xngin_protocol::mysql::flag::StatusFlags; -use xngin_protocol::mysql::principal::Principal; -use xngin_protocol::mysql::serde::MyDeser; -use xngin_sql::ast::{Query, Statement}; -use xngin_sql::parser::dialect::MySQL; -use xngin_sql::parser::parse_stmt; +use doradb_catalog::Catalog; +use doradb_plan::lgc::LgcPlan; +// use doradb_plan::phy::PhyPlan; +use doradb_protocol::buf::ByteBuffer; +use doradb_protocol::mysql::cmd::MyCmd; +use doradb_protocol::mysql::conn::MyConn; +use doradb_protocol::mysql::error::{Error, Result}; +use doradb_protocol::mysql::flag::StatusFlags; +use doradb_protocol::mysql::principal::Principal; +use doradb_protocol::mysql::serde::MyDeser; +use doradb_sql::ast::{Query, Statement}; +use doradb_sql::parser::dialect::MySQL; +use doradb_sql::parser::parse_stmt; pub struct Session<'a, C: Catalog, T> { server: &'a MySQLServer, conn: MyConn, diff --git a/xngin-server/src/mysql/signal.rs b/doradb-server/src/mysql/signal.rs similarity index 90% rename from xngin-server/src/mysql/signal.rs rename to doradb-server/src/mysql/signal.rs index 9839b11..cbbc365 100644 --- a/xngin-server/src/mysql/signal.rs +++ b/doradb-server/src/mysql/signal.rs @@ -2,7 +2,7 @@ use flume::Sender; use signal_hook::consts::*; use signal_hook::iterator::Signals; use std::thread; -use xngin_protocol::mysql::error::Result; +use doradb_protocol::mysql::error::Result; #[inline] pub fn subscribe_stop_signal(tx: Sender<()>) -> Result<()> { diff --git a/xngin-sql/Cargo.toml b/doradb-sql/Cargo.toml similarity index 83% rename from xngin-sql/Cargo.toml rename to doradb-sql/Cargo.toml index 71f6018..47b5470 100644 --- a/xngin-sql/Cargo.toml +++ b/doradb-sql/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-sql" +name = "doradb-sql" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0" description = "SQL parser of X-Engine" keywords = ["sql", "parser", "nom"] categories = ["database-implementations", "parser-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-sql/" +repository = "https://github.com/jiangzhe/doradb/doradb-sql/" [dependencies] nom = "7" diff --git a/xngin-sql/src/ast.rs b/doradb-sql/src/ast.rs similarity index 100% rename from xngin-sql/src/ast.rs rename to doradb-sql/src/ast.rs diff --git a/xngin-sql/src/error.rs b/doradb-sql/src/error.rs similarity index 100% rename from xngin-sql/src/error.rs rename to doradb-sql/src/error.rs diff --git a/xngin-sql/src/lib.rs b/doradb-sql/src/lib.rs similarity index 100% rename from xngin-sql/src/lib.rs rename to doradb-sql/src/lib.rs diff --git a/xngin-sql/src/macros.rs b/doradb-sql/src/macros.rs similarity index 100% rename from xngin-sql/src/macros.rs rename to doradb-sql/src/macros.rs diff --git a/xngin-sql/src/parser/ddl.rs b/doradb-sql/src/parser/ddl.rs similarity index 100% rename from xngin-sql/src/parser/ddl.rs rename to doradb-sql/src/parser/ddl.rs diff --git a/xngin-sql/src/parser/dialect.rs b/doradb-sql/src/parser/dialect.rs similarity index 100% rename from xngin-sql/src/parser/dialect.rs rename to doradb-sql/src/parser/dialect.rs diff --git a/xngin-sql/src/parser/dml.rs b/doradb-sql/src/parser/dml.rs similarity index 100% rename from xngin-sql/src/parser/dml.rs rename to doradb-sql/src/parser/dml.rs diff --git a/xngin-sql/src/parser/expr.rs b/doradb-sql/src/parser/expr.rs similarity index 100% rename from xngin-sql/src/parser/expr.rs rename to doradb-sql/src/parser/expr.rs diff --git a/xngin-sql/src/parser/expr/tests.rs b/doradb-sql/src/parser/expr/tests.rs similarity index 100% rename from xngin-sql/src/parser/expr/tests.rs rename to doradb-sql/src/parser/expr/tests.rs diff --git a/xngin-sql/src/parser/mod.rs b/doradb-sql/src/parser/mod.rs similarity index 100% rename from xngin-sql/src/parser/mod.rs rename to doradb-sql/src/parser/mod.rs diff --git a/xngin-sql/src/parser/query.rs b/doradb-sql/src/parser/query.rs similarity index 100% rename from xngin-sql/src/parser/query.rs rename to doradb-sql/src/parser/query.rs diff --git a/xngin-sql/src/parser/util.rs b/doradb-sql/src/parser/util.rs similarity index 100% rename from xngin-sql/src/parser/util.rs rename to doradb-sql/src/parser/util.rs diff --git a/xngin-sql/src/pretty.rs b/doradb-sql/src/pretty.rs similarity index 100% rename from xngin-sql/src/pretty.rs rename to doradb-sql/src/pretty.rs diff --git a/xngin-sql/tests/pretty.rs b/doradb-sql/tests/pretty.rs similarity index 90% rename from xngin-sql/tests/pretty.rs rename to doradb-sql/tests/pretty.rs index 687dea7..8775486 100644 --- a/xngin-sql/tests/pretty.rs +++ b/doradb-sql/tests/pretty.rs @@ -1,6 +1,6 @@ -use xngin_sql::parser::dialect::MySQL; -use xngin_sql::parser::parse_query_verbose; -use xngin_sql::pretty::{PrettyConf, PrettyFormat}; +use doradb_sql::parser::dialect::MySQL; +use doradb_sql::parser::parse_query_verbose; +use doradb_sql::pretty::{PrettyConf, PrettyFormat}; macro_rules! check_sql { ($filename:literal) => { diff --git a/xngin-storage/Cargo.toml b/doradb-storage/Cargo.toml similarity index 71% rename from xngin-storage/Cargo.toml rename to doradb-storage/Cargo.toml index 20c41f1..daca2c2 100644 --- a/xngin-storage/Cargo.toml +++ b/doradb-storage/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "xngin-storage" +name = "doradb-storage" version = "0.1.0" edition = "2021" authors = ["jiangzhe "] @@ -7,10 +7,10 @@ license = "MIT OR Apache-2.0" description = "Storage module of X-Engine" keywords = ["database", "encoding"] categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-storage/" +repository = "https://github.com/jiangzhe/doradb/doradb-storage/" [dependencies] -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } smallvec = {version = "1.8", features = ["union"]} thiserror = "1.0" bitflags = "1.3" diff --git a/xngin-storage/src/alloc.rs b/doradb-storage/src/alloc.rs similarity index 100% rename from xngin-storage/src/alloc.rs rename to doradb-storage/src/alloc.rs diff --git a/xngin-storage/src/array.rs b/doradb-storage/src/array.rs similarity index 100% rename from xngin-storage/src/array.rs rename to doradb-storage/src/array.rs diff --git a/xngin-storage/src/attr.rs b/doradb-storage/src/attr.rs similarity index 99% rename from xngin-storage/src/attr.rs rename to doradb-storage/src/attr.rs index a9f5ef8..baae3b5 100644 --- a/xngin-storage/src/attr.rs +++ b/doradb-storage/src/attr.rs @@ -10,7 +10,7 @@ use bitflags::bitflags; use smallvec::SmallVec; use std::io; use std::sync::Arc; -use xngin_datatype::{PreciseType, StaticTyped}; +use doradb_datatype::{PreciseType, StaticTyped}; // attribute header level offset const ATTR_HDR_OFFSET_START_FMT: usize = 0; diff --git a/xngin-storage/src/bitmap.rs b/doradb-storage/src/bitmap.rs similarity index 100% rename from xngin-storage/src/bitmap.rs rename to doradb-storage/src/bitmap.rs diff --git a/xngin-storage/src/block.rs b/doradb-storage/src/block.rs similarity index 99% rename from xngin-storage/src/block.rs rename to doradb-storage/src/block.rs index 3c21a16..db4144a 100644 --- a/xngin-storage/src/block.rs +++ b/doradb-storage/src/block.rs @@ -196,7 +196,7 @@ mod tests { use crate::sel::Sel; use std::io::Cursor; use std::sync::Arc; - use xngin_datatype::PreciseType; + use doradb_datatype::PreciseType; #[test] fn test_block_single_store_and_load() { diff --git a/xngin-storage/src/codec/mod.rs b/doradb-storage/src/codec/mod.rs similarity index 100% rename from xngin-storage/src/codec/mod.rs rename to doradb-storage/src/codec/mod.rs diff --git a/xngin-storage/src/codec/single.rs b/doradb-storage/src/codec/single.rs similarity index 100% rename from xngin-storage/src/codec/single.rs rename to doradb-storage/src/codec/single.rs diff --git a/xngin-storage/src/error.rs b/doradb-storage/src/error.rs similarity index 95% rename from xngin-storage/src/error.rs rename to doradb-storage/src/error.rs index b8c339d..45fa036 100644 --- a/xngin-storage/src/error.rs +++ b/doradb-storage/src/error.rs @@ -1,6 +1,6 @@ use std::array::TryFromSliceError; use thiserror::Error; -use xngin_datatype::error::Error as DataTypeError; +use doradb_datatype::error::Error as DataTypeError; pub type Result = std::result::Result; diff --git a/xngin-storage/src/lib.rs b/doradb-storage/src/lib.rs similarity index 100% rename from xngin-storage/src/lib.rs rename to doradb-storage/src/lib.rs diff --git a/xngin-storage/src/repr.rs b/doradb-storage/src/repr.rs similarity index 100% rename from xngin-storage/src/repr.rs rename to doradb-storage/src/repr.rs diff --git a/xngin-storage/src/sel.rs b/doradb-storage/src/sel.rs similarity index 99% rename from xngin-storage/src/sel.rs rename to doradb-storage/src/sel.rs index a60ec1c..041e859 100644 --- a/xngin-storage/src/sel.rs +++ b/doradb-storage/src/sel.rs @@ -6,7 +6,7 @@ use crate::error::{Error, Result}; use smallvec::SmallVec; use std::collections::BTreeSet; use std::sync::Arc; -use xngin_datatype::PreciseType; +use doradb_datatype::PreciseType; /// Sel encodes filter indexes into bitmap, single or none. #[derive(Debug)] diff --git a/xngin-storage/src/slice_ext.rs b/doradb-storage/src/slice_ext.rs similarity index 100% rename from xngin-storage/src/slice_ext.rs rename to doradb-storage/src/slice_ext.rs diff --git a/xngin-storage/src/sma.rs b/doradb-storage/src/sma.rs similarity index 100% rename from xngin-storage/src/sma.rs rename to doradb-storage/src/sma.rs diff --git a/doradb-tpch-tests/Cargo.toml b/doradb-tpch-tests/Cargo.toml new file mode 100644 index 0000000..951f673 --- /dev/null +++ b/doradb-tpch-tests/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "doradb-tpch-tests" +version = "0.1.0" +edition = "2021" +authors = ["jiangzhe "] +license = "MIT OR Apache-2.0" +description = "TPCH test cases of X-Engine" +repository = "https://github.com/jiangzhe/doradb/doradb-tpch-tests/" + +[dependencies] +doradb-datatype = { version = "0.1.0", path = "../doradb-datatype" } +doradb-catalog = { version = "0.1.0", path = "../doradb-catalog" } +doradb-sql = { version = "0.1.0", path = "../doradb-sql" } +doradb-plan = { version = "0.1.0", path = "../doradb-plan" } +aosa = "0.1" diff --git a/xngin-tpch-tests/src/lib.rs b/doradb-tpch-tests/src/lib.rs similarity index 98% rename from xngin-tpch-tests/src/lib.rs rename to doradb-tpch-tests/src/lib.rs index 288f7a8..f02827e 100644 --- a/xngin-tpch-tests/src/lib.rs +++ b/doradb-tpch-tests/src/lib.rs @@ -1,6 +1,6 @@ -use xngin_catalog::mem_impl::MemCatalog; -use xngin_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; -use xngin_datatype::PreciseType; +use doradb_catalog::mem_impl::MemCatalog; +use doradb_catalog::{Catalog, ColumnAttr, ColumnSpec, TableSpec}; +use doradb_datatype::PreciseType; #[inline] pub fn tpch_catalog() -> MemCatalog { diff --git a/xngin-tpch-tests/tests/build_lgc.rs b/doradb-tpch-tests/tests/build_lgc.rs similarity index 90% rename from xngin-tpch-tests/tests/build_lgc.rs rename to doradb-tpch-tests/tests/build_lgc.rs index 251f0ee..3ab113f 100644 --- a/xngin-tpch-tests/tests/build_lgc.rs +++ b/doradb-tpch-tests/tests/build_lgc.rs @@ -1,8 +1,8 @@ -use xngin_plan::explain::{Explain, ExplainConf}; -use xngin_plan::lgc::LgcPlan; -use xngin_sql::parser::dialect::Ansi; -use xngin_sql::parser::parse_query_verbose; -use xngin_tpch_tests::tpch_catalog; +use doradb_plan::explain::{Explain, ExplainConf}; +use doradb_plan::lgc::LgcPlan; +use doradb_sql::parser::dialect::Ansi; +use doradb_sql::parser::parse_query_verbose; +use doradb_tpch_tests::tpch_catalog; macro_rules! check_build { ( $filename:literal ) => { diff --git a/xngin-tpch-tests/tests/parse.rs b/doradb-tpch-tests/tests/parse.rs similarity index 99% rename from xngin-tpch-tests/tests/parse.rs rename to doradb-tpch-tests/tests/parse.rs index 98f69fd..cb6e1b6 100644 --- a/xngin-tpch-tests/tests/parse.rs +++ b/doradb-tpch-tests/tests/parse.rs @@ -1,7 +1,7 @@ use std::ops::{Add, Div, Mul, Sub}; -use xngin_sql::ast::*; -use xngin_sql::parser::dialect::{Ansi, MySQL}; -use xngin_sql::parser::{parse_multi_stmts, parse_query_verbose}; +use doradb_sql::ast::*; +use doradb_sql::parser::dialect::{Ansi, MySQL}; +use doradb_sql::parser::{parse_multi_stmts, parse_query_verbose}; macro_rules! col { ( $($lit:literal).* ) => { diff --git a/xngin-tpch-tests/tests/reflect.rs b/doradb-tpch-tests/tests/reflect.rs similarity index 90% rename from xngin-tpch-tests/tests/reflect.rs rename to doradb-tpch-tests/tests/reflect.rs index 03785bb..338bfc7 100644 --- a/xngin-tpch-tests/tests/reflect.rs +++ b/doradb-tpch-tests/tests/reflect.rs @@ -1,10 +1,10 @@ use aosa::StringArena; -use xngin_plan::lgc::reflect; -use xngin_plan::lgc::LgcPlan; -use xngin_sql::parser::dialect::Ansi; -use xngin_sql::parser::parse_query_verbose; -use xngin_sql::pretty::{PrettyConf, PrettyFormat}; -use xngin_tpch_tests::tpch_catalog; +use doradb_plan::lgc::reflect; +use doradb_plan::lgc::LgcPlan; +use doradb_sql::parser::dialect::Ansi; +use doradb_sql::parser::parse_query_verbose; +use doradb_sql::pretty::{PrettyConf, PrettyFormat}; +use doradb_tpch_tests::tpch_catalog; macro_rules! check_reflect { ( $filename:literal ) => { diff --git a/xngin-tpch-tests/tests/rule_optimize.rs b/doradb-tpch-tests/tests/rule_optimize.rs similarity index 94% rename from xngin-tpch-tests/tests/rule_optimize.rs rename to doradb-tpch-tests/tests/rule_optimize.rs index aba0224..c3d113e 100644 --- a/xngin-tpch-tests/tests/rule_optimize.rs +++ b/doradb-tpch-tests/tests/rule_optimize.rs @@ -1,11 +1,11 @@ use std::time::Instant; -use xngin_catalog::Catalog; -use xngin_plan::explain::{Explain, ExplainConf}; -use xngin_plan::lgc::LgcPlan; -use xngin_plan::rule::rule_optimize; -use xngin_sql::parser::dialect::Ansi; -use xngin_sql::parser::parse_query_verbose; -use xngin_tpch_tests::tpch_catalog; +use doradb_catalog::Catalog; +use doradb_plan::explain::{Explain, ExplainConf}; +use doradb_plan::lgc::LgcPlan; +use doradb_plan::rule::rule_optimize; +use doradb_sql::parser::dialect::Ansi; +use doradb_sql::parser::parse_query_verbose; +use doradb_tpch_tests::tpch_catalog; #[test] fn test_tpch_rule_optimize1() { diff --git a/xngin-plan/Cargo.toml b/xngin-plan/Cargo.toml deleted file mode 100644 index e355281..0000000 --- a/xngin-plan/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "xngin-plan" -version = "0.1.0" -edition = "2021" -authors = ["jiangzhe "] -license = "MIT OR Apache-2.0" -description = "Planner module of X-Engine" -keywords = ["query", "planning", "optimization"] -categories = ["database-implementations"] -repository = "https://github.com/jiangzhe/xngin/xngin-plan/" - -[dependencies] -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } -xngin-sql = { version = "0.1.0", path = "../xngin-sql" } -xngin-expr = { version = "0.1.0", path = "../xngin-expr" } -xngin-storage = { version = "0.1.0", path = "../xngin-storage" } -xngin-compute = { version = "0.1.0", path = "../xngin-compute" } -aosa = "0.1" -slab = "0.4" -thiserror = "1.0" -semistr = "0.1" -fnv = "1.0" -indexmap = "1.7" -static_init = "1.0" -smallvec = {version = "1.8", features = ["union"]} -bitflags = "1.3" \ No newline at end of file diff --git a/xngin-plan/src/rule/output_fix.rs b/xngin-plan/src/rule/output_fix.rs deleted file mode 100644 index 2d0d25e..0000000 --- a/xngin-plan/src/rule/output_fix.rs +++ /dev/null @@ -1,135 +0,0 @@ -use crate::lgc::{QuerySet, Location}; -use crate::lgc::{Op, OpKind, OpVisitor, OpMutVisitor, ProjCol}; -use crate::error::{Error, Result}; -use xngin_expr::QueryID; -use xngin_datatype::PreciseType; -use std::sync::Arc; -use std::collections::HashMap; -use std::collections::hash_map::Entry; -use std::ops::ControlFlow; - -/// Output fix is one step at end of the logical plan optimization. -/// It removes all unnecessary projection nodes and fix output of -/// each other nodes. -/// -/// We need to take care of following cases. -/// -/// 1. invisible column. -/// -/// Example sql: "SELECT c0 FROM t1 ORDER BY c1". -/// The sort operator at top requires `c1` but projection operator only -/// output `c0`, consistent with output of the whole plan. -/// In such case, we need to add an *invisible* output `c1` to projection -/// to make sure sort has enough data. -/// If all operators support projection, we can remove projection nodes -/// and make the plan compact. -/// -/// 2. aggregation. -/// -/// Example sql: "SELECT c0, SUM(c1) + c0 FROM t1 GROUP BY c0". -/// If we separate the calculation into projection and aggregation, -/// we can process `c0` and `SUM(c1)` in aggregation, then perform -/// the addition in projection. -/// If we want to remove projection, we have to let the output stage -/// of aggregation handle the addition. -/// -/// The basic steps are: -/// 1. Find output columns of top query. -/// 2. Set output columns to root operator. -/// 3. From root to leaf, set output according to each operator's behavior. -#[inline] -pub fn output_fix(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { - if let Some(subq) = qry_set.get_mut(&qry_id) { - let out_cols: Vec = subq.out_cols().iter().map(|c| c.clone()).collect(); - fix_output(qry_set, qry_id, Arc::new(out_cols)) - } else { - Err(Error::QueryNotFound(qry_id)) - } -} - -#[inline] -fn fix_output(qry_set: &mut QuerySet, qry_id: QueryID, output: Arc>) -> Result<()> { - todo!() -} - -/// collect expression of first projection/aggregation/row operator and try to -/// collapse those in common in other nodes. -struct CollectTopDown { - out: Vec, - // out map stores output expression other than column. - out_map: HashMap, - out_inited: bool, -} - -struct CollectBottomUp { - -} - -// impl OpVisitor for Collect { -// type Cont = (); -// type Break = Error; -// #[inline] -// fn leave(&mut self, op: &Op) -> ControlFlow { -// match &op.kind { -// OpKind::Query(_) => (), -// OpKind::Proj{cols, ..} => { -// let cols = cols.as_ref().unwrap(); -// if !self.out_fixed { -// self.out.extend_from_slice(cols); -// self.out_fixed = true; -// } -// for c in cols { -// if c.expr.kind.is_col() { - -// } -// } -// } -// } -// } -// } - -struct Fix { - input: Vec, - output: Vec<(usize, PreciseType)>, - // duplicate expressions should be only kept one copy, - // others can be derived from the first one. - expr_map: HashMap, -} - -impl Fix { - #[inline] - fn new(out_cols: Vec) -> Self { - // let mut expr_map = HashMap::with_capacity(out_cols.len()); - // for c in out_cols.into_iter() { - // match expr_map.entry(c.expr) { - // Entry::Occupied(occ) => { - // // duplicate expression found, we only need child to output the first one - // // and make copy of it. - // // e.g. [a, b, b, c] => input=[a, b, c], index=[0, 1, 1, 2] - // let output = (*occ.get(), occ.key().ty); - - // } - // } - // } - todo!() - } -} - -impl OpMutVisitor for Fix { - type Cont = (); - type Break = Error; - #[inline] - fn enter(&mut self, op: &mut Op) -> ControlFlow { - // match &mut op.kind { - // OpKind::Proj { cols, input } => { - // if let Some(cols) = cols.take() { - // // check it - // } - // } - // OpKind::Sort { items, limit, input } => { - - // } - // } - todo!() - } -} diff --git a/xngin-plan/src/rule/pred_move.rs b/xngin-plan/src/rule/pred_move.rs deleted file mode 100644 index 070cfab..0000000 --- a/xngin-plan/src/rule/pred_move.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::error::Result; -use crate::lgc::QuerySet; -use crate::rule::assign_id::assign_id; -use std::collections::{HashMap, HashSet}; -use xngin_expr::{ExprKind, GlobalID, QueryID}; - -/// Predicate Movearound -/// -/// This is a combination of prediate pushdown(PPD) and predicate pullup(PPU). -/// There are two important components to move around predicates. -/// -/// 1. sets of columns. -/// An equal set contains columns that are specified equal in join conditions. -/// -/// Example 1: "SELECT * FROM t1 JOIN t2 ON t1.c1 = t2.c2" -/// Equal set: {t1.c1, t2.c2} -/// -/// Example 2: "SELECT * FROM t1 JOIN t2 ON t1.c1 = t2.c2 JOIN t3 ON t1.c1 = t3.c3" -/// Equal set: {t1.c1, t2.c2, t3.c3} -/// -/// Example 3: "SELECT * FROM t1 LEFT JOIN t2 ON t1.c1 = t2.c2" -/// Equal set: {}, but predicate can be propagated from t1.c1 to t2.c2. -/// -/// 2. sets of filter expressions. -/// New expressions can be derived from existing expressions and equal set of columns -/// -/// Example 1: "SELECT * FROM t1 JOIN t2 ON t1.c1 = t2.c2 WHERE t1.c1 > 0" -/// exprs: t1.c1 > 0 -/// derived exprs: t2.c2 > 0 -/// -#[inline] -pub fn pred_move(qry_set: &mut QuerySet, qry_id: QueryID) -> Result<()> { - assign_id(qry_set, qry_id)?; - todo!() -} diff --git a/xngin-tpch-tests/Cargo.toml b/xngin-tpch-tests/Cargo.toml deleted file mode 100644 index d0c4409..0000000 --- a/xngin-tpch-tests/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "xngin-tpch-tests" -version = "0.1.0" -edition = "2021" -authors = ["jiangzhe "] -license = "MIT OR Apache-2.0" -description = "TPCH test cases of X-Engine" -repository = "https://github.com/jiangzhe/xngin/xngin-tpch-tests/" - -[dependencies] -xngin-datatype = { version = "0.1.0", path = "../xngin-datatype" } -xngin-catalog = { version = "0.1.0", path = "../xngin-catalog" } -xngin-sql = { version = "0.1.0", path = "../xngin-sql" } -xngin-plan = { version = "0.1.0", path = "../xngin-plan" } -aosa = "0.1"