diff --git a/vortex-expr/src/lib.rs b/vortex-expr/src/lib.rs index 1ace3b32ca..151a5175c8 100644 --- a/vortex-expr/src/lib.rs +++ b/vortex-expr/src/lib.rs @@ -282,7 +282,7 @@ mod tests { vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())] )) .to_string(), - "{dog:32_u32,cat:rufus}" + "{dog:32_u32,cat:\"rufus\"}" ); } } diff --git a/vortex-expr/src/pruning.rs b/vortex-expr/src/pruning.rs index a0e6204a1a..1d0127eb8c 100644 --- a/vortex-expr/src/pruning.rs +++ b/vortex-expr/src/pruning.rs @@ -316,20 +316,7 @@ impl<'a> PruningPredicateRewriter<'a> { let replaced_max = self.rewrite_other_exp(Stat::Max); let replaced_min = self.rewrite_other_exp(Stat::Min); - let column_value_is_single_known_value = eq(min_col.clone(), max_col.clone()); - let column_value = min_col; - - let other_value_is_single_known_value = - eq(replaced_min.clone(), replaced_max.clone()); - let other_value = replaced_min; - - Some(and( - and( - column_value_is_single_known_value, - other_value_is_single_known_value, - ), - eq(column_value, other_value), - )) + Some(and(eq(min_col, replaced_max), eq(max_col, replaced_min))) } Operator::Gt | Operator::Gte => { let max_col = get_item(self.add_stat_reference(Stat::Max), ident()); @@ -540,18 +527,12 @@ mod tests { ]) ); let expected_expr = and( - and( - eq( - get_item_scope(stat_field_name(&column, Stat::Min)), - get_item_scope(stat_field_name(&column, Stat::Max)), - ), - eq( - get_item_scope(stat_field_name(&other_col, Stat::Min)), - get_item_scope(stat_field_name(&other_col, Stat::Max)), - ), - ), eq( get_item_scope(stat_field_name(&column, Stat::Min)), + get_item_scope(stat_field_name(&other_col, Stat::Max)), + ), + eq( + get_item_scope(stat_field_name(&column, Stat::Max)), get_item_scope(stat_field_name(&other_col, Stat::Min)), ), ); diff --git a/vortex-file/src/file.rs b/vortex-file/src/file.rs index 14197b76f1..521e92d906 100644 --- a/vortex-file/src/file.rs +++ b/vortex-file/src/file.rs @@ -14,7 +14,6 @@ use vortex_array::ContextRef; use vortex_buffer::Buffer; use vortex_dtype::{DType, FieldPath}; use vortex_error::{vortex_err, VortexExpect, VortexResult}; -use vortex_expr::transform::simplify_typed::simplify_typed; use vortex_expr::{ident, ExprRef}; use vortex_layout::{ExprEvaluator, LayoutReader}; use vortex_scan::{RowMask, Scanner}; @@ -164,12 +163,8 @@ impl VortexFile { where R: Iterator + Send + 'static, { - let dt = self.dtype().clone(); - let scanner = Arc::new(Scanner::new( - self.dtype().clone(), - simplify_typed(projection, dt.clone())?, - filter.map(|f| simplify_typed(f, dt)).transpose()?, - )?); + let scanner = Arc::new(Scanner::new(self.dtype().clone(), projection, filter)?); + let result_dtype = scanner.result_dtype().clone(); // Set up a segment channel to collect segment requests from the execution stream. diff --git a/vortex-scalar/src/display.rs b/vortex-scalar/src/display.rs index 049bf2bc39..d177c09e67 100644 --- a/vortex-scalar/src/display.rs +++ b/vortex-scalar/src/display.rs @@ -21,7 +21,7 @@ impl Display for Scalar { .value() { None => write!(f, "null"), - Some(bs) => write!(f, "{}", bs.as_str()), + Some(bs) => write!(f, "\"{}\"", bs.as_str()), } } DType::Binary(_) => { @@ -33,7 +33,7 @@ impl Display for Scalar { Some(buf) => { write!( f, - "{}", + "\"{}\"", buf.as_slice().iter().map(|b| format!("{b:x}")).format(",") ) } @@ -155,7 +155,10 @@ mod tests { #[test] fn display_utf8() { - assert_eq!(format!("{}", Scalar::from("Hello World!")), "Hello World!"); + assert_eq!( + format!("{}", Scalar::from("Hello World!")), + "\"Hello World!\"" + ); assert_eq!(format!("{}", Scalar::null(DType::Utf8(Nullable))), "null"); } @@ -169,7 +172,7 @@ mod tests { NonNullable ) ), - "48,65,6c,6c,6f,20,57,6f,72,6c,64,21" + "\"48,65,6c,6c,6f,20,57,6f,72,6c,64,21\"" ); assert_eq!(format!("{}", Scalar::null(DType::Binary(Nullable))), "null"); } diff --git a/vortex-scalar/src/value.rs b/vortex-scalar/src/value.rs index 57deabbcc7..92925fca17 100644 --- a/vortex-scalar/src/value.rs +++ b/vortex-scalar/src/value.rs @@ -69,7 +69,7 @@ impl Display for InnerScalarValue { &bufstr.as_str()[bufstr.len() - 5..bufstr.len()], ) } else { - write!(f, "{}", bufstr.as_str()) + write!(f, "\"{}\"", bufstr.as_str()) } } Self::List(elems) => { diff --git a/vortex-scan/src/lib.rs b/vortex-scan/src/lib.rs index e416daf974..1561f819df 100644 --- a/vortex-scan/src/lib.rs +++ b/vortex-scan/src/lib.rs @@ -9,6 +9,7 @@ use vortex_array::{ArrayDType, Canonical, IntoArrayData}; use vortex_dtype::DType; use vortex_error::VortexResult; use vortex_expr::forms::cnf::cnf; +use vortex_expr::transform::simplify_typed::simplify_typed; use vortex_expr::{lit, or, ExprRef}; /// Represents a scan operation to read data from a set of rows, with an optional filter expression, @@ -24,8 +25,6 @@ use vortex_expr::{lit, or, ExprRef}; /// the second filter over the reduced set of rows. #[derive(Debug, Clone)] pub struct Scanner { - #[allow(dead_code)] - dtype: DType, projection: ExprRef, rev_filter: Box<[ExprRef]>, projection_dtype: DType, @@ -38,6 +37,8 @@ pub struct Scanner { impl Scanner { /// Create a new scan with the given projection and optional filter. pub fn new(dtype: DType, projection: ExprRef, filter: Option) -> VortexResult { + let projection = simplify_typed(projection, dtype.clone())?; + // TODO(ngates): compute and cache a FieldMask based on the referenced fields. // Where FieldMask ~= Vec let result_dtype = projection @@ -45,6 +46,8 @@ impl Scanner { .dtype() .clone(); + let filter = filter.map(|f| simplify_typed(f, dtype)).transpose()?; + let conjuncts: Box<[ExprRef]> = if let Some(filter) = filter { let conjuncts = cnf(filter)?; conjuncts @@ -63,7 +66,6 @@ impl Scanner { }; Ok(Self { - dtype, projection, rev_filter: conjuncts, projection_dtype: result_dtype,