From e94bae01fd3066bb8f47588fd75fa32f99170c47 Mon Sep 17 00:00:00 2001 From: joe Date: Fri, 6 Sep 2024 14:16:15 +0800 Subject: [PATCH] Refactor codebase to optimize execution of logical plans and improve aggregation functionality --- qurious/src/execution/session.rs | 2 +- qurious/src/logical/expr/binary.rs | 8 ++++++++ sqlparser/src/datatype.rs | 4 ++++ sqlparser/src/error.rs | 5 +++++ sqlparser/src/lexer.rs | 26 +++++++++++++++++--------- sqlparser/src/parser.rs | 11 +++++++++++ sqlparser/src/token.rs | 1 + 7 files changed, 47 insertions(+), 10 deletions(-) diff --git a/qurious/src/execution/session.rs b/qurious/src/execution/session.rs index bf749d5..2d57415 100644 --- a/qurious/src/execution/session.rs +++ b/qurious/src/execution/session.rs @@ -65,7 +65,6 @@ impl ExecuteSession { pub fn execute_logical_plan(&self, plan: &LogicalPlan) -> Result> { // let plan = self.optimizer.optimize(plan)?; - match &plan { LogicalPlan::Ddl(ddl) => self.execute_ddl(ddl), LogicalPlan::Dml(DmlStatement { @@ -228,6 +227,7 @@ mod tests { let batch = session.sql("select 1+0.1")?; + print_batches(&batch)?; Ok(()) diff --git a/qurious/src/logical/expr/binary.rs b/qurious/src/logical/expr/binary.rs index 04d2a4f..43f38c8 100644 --- a/qurious/src/logical/expr/binary.rs +++ b/qurious/src/logical/expr/binary.rs @@ -58,6 +58,14 @@ impl BinaryExpr { self.left, Box::new(LogicalExpr::Cast(CastExpr::new(*self.right, DataType::LargeUtf8))), ), + (DataType::Float64, _) => ( + Box::new(LogicalExpr::Cast(CastExpr::new(*self.left, DataType::Float64))), + self.right, + ), + (_, DataType::Float64) => ( + self.left, + Box::new(LogicalExpr::Cast(CastExpr::new(*self.right, DataType::Float64))), + ), _ => (self.left, self.right), }; diff --git a/sqlparser/src/datatype.rs b/sqlparser/src/datatype.rs index d0efb3a..892f6bd 100644 --- a/sqlparser/src/datatype.rs +++ b/sqlparser/src/datatype.rs @@ -19,3 +19,7 @@ impl Display for DataType { } } } + +pub struct Number{ + +} diff --git a/sqlparser/src/error.rs b/sqlparser/src/error.rs index f59bdea..9b3dfd8 100644 --- a/sqlparser/src/error.rs +++ b/sqlparser/src/error.rs @@ -4,6 +4,7 @@ macro_rules! generate_error_enum { pub enum Error { $($variant(crate::token::Token)),*, ParseIntError(std::num::ParseIntError,crate::token::Token), + ParseFloatError(std::num::ParseFloatError,crate::token::Token), DuplicateColumn(String), UnKnownInfixOperator(String), ParserError(String), @@ -18,6 +19,9 @@ macro_rules! generate_error_enum { Error::ParseIntError(e, token) => { write!(f, "error: {} line: {} column: {}", e, token.location.line, token.location.column) } + Error::ParseFloatError(e, token) => { + write!(f, "error: {} line: {} column: {}", e, token.location.line, token.location.column) + } Error::DuplicateColumn(column) => { write!(f, "error: duplicate column: {}", column) } @@ -45,6 +49,7 @@ impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::ParseIntError(e, _) => Some(e), + Error::ParseFloatError(e, _) => Some(e), _ => None, } } diff --git a/sqlparser/src/lexer.rs b/sqlparser/src/lexer.rs index cd67695..f27a963 100644 --- a/sqlparser/src/lexer.rs +++ b/sqlparser/src/lexer.rs @@ -92,9 +92,7 @@ impl<'a> Lexer<'a> { '\'' => { break; } - EMPTY_CHAR => { - return Token::new(TokenType::ILLIGAL, literal, self.location()) - } + EMPTY_CHAR => return Token::new(TokenType::ILLIGAL, literal, self.location()), _ => { s.push(char::from(self.cur_ch)); } @@ -108,7 +106,11 @@ impl<'a> Lexer<'a> { return Token::new(token_type, literal, self.location()); } b if b.is_ascii_digit() => { - return Token::new(TokenType::Int, self.read_number(), self.location()); + let number = self.read_number(); + if number.contains('.') { + return Token::new(TokenType::Float, number, self.location()); + } + return Token::new(TokenType::Int, number, self.location()); } _ => Token::new(TokenType::ILLIGAL, literal, self.location()), }; @@ -142,10 +144,7 @@ impl<'a> Lexer<'a> { fn read_literal(&mut self) -> String { let mut literal = String::new(); - while self.cur_ch.is_ascii_alphabetic() - || self.cur_ch.is_ascii_alphanumeric() - || self.cur_ch == '_' - { + while self.cur_ch.is_ascii_alphabetic() || self.cur_ch.is_ascii_alphanumeric() || self.cur_ch == '_' { literal.push(self.cur_ch); self.read_char(); } @@ -155,7 +154,7 @@ impl<'a> Lexer<'a> { fn read_number(&mut self) -> String { let mut number = String::new(); - while self.cur_ch.is_ascii_digit() { + while self.cur_ch.is_ascii_digit() || self.cur_ch == '.' { number.push(self.cur_ch); self.read_char(); } @@ -178,6 +177,15 @@ mod tests { use super::*; use crate::token::{Keyword, TokenType}; + #[test] + fn test_float() { + let input = "1.23"; + let mut l = Lexer::new(input); + let tok = l.next(); + assert_eq!(tok.token_type, TokenType::Float); + assert_eq!(tok.literal, "1.23"); + } + #[test] fn test_single_char_token() { let input = "=-+(){},;*/<>!?"; diff --git a/sqlparser/src/parser.rs b/sqlparser/src/parser.rs index 6234ab4..dd05cda 100644 --- a/sqlparser/src/parser.rs +++ b/sqlparser/src/parser.rs @@ -766,6 +766,10 @@ impl<'a> Parser<'a> { } } TokenType::Asterisk => Ok(ast::Expression::Identifier("*".into())), + TokenType::Float=> literal + .parse() + .map(|f| ast::Expression::Literal(ast::Literal::Float(f))) + .map_err(|e| Error::ParseFloatError(e, token)), TokenType::Int => literal .parse() .map(|i| ast::Expression::Literal(ast::Literal::Int(i))) @@ -3063,6 +3067,13 @@ mod tests { ); } + #[test] + fn test_parse_float() { + let stmt = parse_expr("1.0").unwrap(); + + assert_eq!(stmt, Expression::Literal(ast::Literal::Float(1.0))); + } + #[test] fn test_parse_integer() { let stmt = parse_expr("123").unwrap(); diff --git a/sqlparser/src/token.rs b/sqlparser/src/token.rs index d8dd3f8..4f0a048 100644 --- a/sqlparser/src/token.rs +++ b/sqlparser/src/token.rs @@ -71,6 +71,7 @@ pub enum TokenType { Ident, String, Int, + Float, // Operators Assign,