Skip to content

Commit

Permalink
feat(cost-model): Initial framework of compute cost migration (#31)
Browse files Browse the repository at this point in the history
* First draft of migration

* add storage part

* 1. add compute_cost 2. add todo documentation 3. improve type like tableid 4. CMSL->S
  • Loading branch information
lanlou1554 authored Nov 14, 2024
1 parent db8829d commit 21d01ae
Show file tree
Hide file tree
Showing 20 changed files with 4,389 additions and 0 deletions.
3,678 changes: 3,678 additions & 0 deletions optd-cost-model/Cargo.lock

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions optd-cost-model/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "optd-cost-model"
version = "0.1.0"
edition = "2021"

[dependencies]
optd-persistent = { path = "../optd-persistent", version = "0.1" }
serde = { version = "1.0", features = ["derive"] }
arrow-schema = "53.2.0"
datafusion-expr = "32.0.0"
ordered-float = "4.0"
chrono = "0.4"

4 changes: 4 additions & 0 deletions optd-cost-model/src/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod nodes;
pub mod predicates;
pub mod types;
pub mod values;
96 changes: 96 additions & 0 deletions optd-cost-model/src/common/nodes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
use std::sync::Arc;

use arrow_schema::DataType;

use super::{
predicates::{
bin_op_pred::BinOpType, constant_pred::ConstantType, func_pred::FuncType,
log_op_pred::LogOpType, sort_order_pred::SortOrderType, un_op_pred::UnOpType,
},
values::Value,
};

/// TODO: documentation
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JoinType {
Inner = 1,
FullOuter,
LeftOuter,
RightOuter,
Cross,
LeftSemi,
RightSemi,
LeftAnti,
RightAnti,
}

/// TODO: documentation
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhysicalNodeType {
PhysicalProjection,
PhysicalFilter,
PhysicalScan,
PhysicalSort,
PhysicalAgg,
PhysicalHashJoin(JoinType),
PhysicalNestedLoopJoin(JoinType),
PhysicalEmptyRelation,
PhysicalLimit,
}

impl std::fmt::Display for PhysicalNodeType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

/// TODO: documentation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PredicateType {
List,
Constant(ConstantType),
AttributeRef,
ExternAttributeRef,
UnOp(UnOpType),
BinOp(BinOpType),
LogOp(LogOpType),
Func(FuncType),
SortOrder(SortOrderType),
Between,
Cast,
Like,
DataType(DataType),
InList,
}

impl std::fmt::Display for PredicateType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

pub type ArcPredicateNode = Arc<PredicateNode>;

/// TODO: documentation
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct PredicateNode {
/// A generic predicate node type
pub typ: PredicateType,
/// Child predicate nodes, always materialized
pub children: Vec<PredicateNode>,
/// Data associated with the predicate, if any
pub data: Option<Value>,
}

impl std::fmt::Display for PredicateNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({}", self.typ)?;
for child in &self.children {
write!(f, " {}", child)?;
}
if let Some(data) = &self.data {
write!(f, " {}", data)?;
}
write!(f, ")")
}
}
40 changes: 40 additions & 0 deletions optd-cost-model/src/common/predicates/bin_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum BinOpType {
// numerical
Add,
Sub,
Mul,
Div,
Mod,

// comparison
Eq,
Neq,
Gt,
Lt,
Geq,
Leq,
}

impl std::fmt::Display for BinOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

impl BinOpType {
pub fn is_numerical(&self) -> bool {
matches!(
self,
Self::Add | Self::Sub | Self::Mul | Self::Div | Self::Mod
)
}

pub fn is_comparison(&self) -> bool {
matches!(
self,
Self::Eq | Self::Neq | Self::Gt | Self::Lt | Self::Geq | Self::Leq
)
}
}
21 changes: 21 additions & 0 deletions optd-cost-model/src/common/predicates/constant_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use serde::{Deserialize, Serialize};

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
pub enum ConstantType {
Bool,
Utf8String,
UInt8,
UInt16,
UInt32,
UInt64,
Int8,
Int16,
Int32,
Int64,
Float64,
Date,
IntervalMonthDateNano,
Decimal,
Binary,
}
23 changes: 23 additions & 0 deletions optd-cost-model/src/common/predicates/func_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/// TODO: documentation
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum FuncType {
Scalar(datafusion_expr::BuiltinScalarFunction),
Agg(datafusion_expr::AggregateFunction),
Case,
}

impl std::fmt::Display for FuncType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

impl FuncType {
pub fn new_scalar(func_id: datafusion_expr::BuiltinScalarFunction) -> Self {
FuncType::Scalar(func_id)
}

pub fn new_agg(func_id: datafusion_expr::AggregateFunction) -> Self {
FuncType::Agg(func_id)
}
}
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/log_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum LogOpType {
And,
Or,
}

impl Display for LogOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
6 changes: 6 additions & 0 deletions optd-cost-model/src/common/predicates/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pub mod bin_op_pred;
pub mod constant_pred;
pub mod func_pred;
pub mod log_op_pred;
pub mod sort_order_pred;
pub mod un_op_pred;
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/sort_order_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum SortOrderType {
Asc,
Desc,
}

impl Display for SortOrderType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/un_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum UnOpType {
Neg = 1,
Not,
}

impl Display for UnOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
51 changes: 51 additions & 0 deletions optd-cost-model/src/common/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct GroupId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct ExprId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct TableId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct AttrId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct EpochId(pub usize);

impl Display for GroupId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "!{}", self.0)
}
}

impl Display for ExprId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}

impl Display for TableId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Table#{}", self.0)
}
}

impl Display for AttrId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Attr#{}", self.0)
}
}

impl Display for EpochId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Epoch#{}", self.0)
}
}
Loading

0 comments on commit 21d01ae

Please sign in to comment.