Skip to content

Commit

Permalink
Some simplifications to pruning and exprs (#1990)
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs authored Jan 17, 2025
1 parent ab5ea83 commit a8aa9a0
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 40 deletions.
2 changes: 1 addition & 1 deletion vortex-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ mod tests {
vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())]
))
.to_string(),
"{dog:32_u32,cat:rufus}"
"{dog:32_u32,cat:\"rufus\"}"
);
}
}
29 changes: 5 additions & 24 deletions vortex-expr/src/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,20 +316,7 @@ impl<'a> PruningPredicateRewriter<'a> {
let replaced_max = self.rewrite_other_exp(Stat::Max);
let replaced_min = self.rewrite_other_exp(Stat::Min);

let column_value_is_single_known_value = eq(min_col.clone(), max_col.clone());
let column_value = min_col;

let other_value_is_single_known_value =
eq(replaced_min.clone(), replaced_max.clone());
let other_value = replaced_min;

Some(and(
and(
column_value_is_single_known_value,
other_value_is_single_known_value,
),
eq(column_value, other_value),
))
Some(and(eq(min_col, replaced_max), eq(max_col, replaced_min)))
}
Operator::Gt | Operator::Gte => {
let max_col = get_item(self.add_stat_reference(Stat::Max), ident());
Expand Down Expand Up @@ -540,18 +527,12 @@ mod tests {
])
);
let expected_expr = and(
and(
eq(
get_item_scope(stat_field_name(&column, Stat::Min)),
get_item_scope(stat_field_name(&column, Stat::Max)),
),
eq(
get_item_scope(stat_field_name(&other_col, Stat::Min)),
get_item_scope(stat_field_name(&other_col, Stat::Max)),
),
),
eq(
get_item_scope(stat_field_name(&column, Stat::Min)),
get_item_scope(stat_field_name(&other_col, Stat::Max)),
),
eq(
get_item_scope(stat_field_name(&column, Stat::Max)),
get_item_scope(stat_field_name(&other_col, Stat::Min)),
),
);
Expand Down
9 changes: 2 additions & 7 deletions vortex-file/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use vortex_array::ContextRef;
use vortex_buffer::Buffer;
use vortex_dtype::{DType, FieldPath};
use vortex_error::{vortex_err, VortexExpect, VortexResult};
use vortex_expr::transform::simplify_typed::simplify_typed;
use vortex_expr::{ident, ExprRef};
use vortex_layout::{ExprEvaluator, LayoutReader};
use vortex_scan::{RowMask, Scanner};
Expand Down Expand Up @@ -164,12 +163,8 @@ impl<I: IoDriver> VortexFile<I> {
where
R: Iterator<Item = RowMask> + Send + 'static,
{
let dt = self.dtype().clone();
let scanner = Arc::new(Scanner::new(
self.dtype().clone(),
simplify_typed(projection, dt.clone())?,
filter.map(|f| simplify_typed(f, dt)).transpose()?,
)?);
let scanner = Arc::new(Scanner::new(self.dtype().clone(), projection, filter)?);

let result_dtype = scanner.result_dtype().clone();

// Set up a segment channel to collect segment requests from the execution stream.
Expand Down
11 changes: 7 additions & 4 deletions vortex-scalar/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl Display for Scalar {
.value()
{
None => write!(f, "null"),
Some(bs) => write!(f, "{}", bs.as_str()),
Some(bs) => write!(f, "\"{}\"", bs.as_str()),
}
}
DType::Binary(_) => {
Expand All @@ -33,7 +33,7 @@ impl Display for Scalar {
Some(buf) => {
write!(
f,
"{}",
"\"{}\"",
buf.as_slice().iter().map(|b| format!("{b:x}")).format(",")
)
}
Expand Down Expand Up @@ -155,7 +155,10 @@ mod tests {

#[test]
fn display_utf8() {
assert_eq!(format!("{}", Scalar::from("Hello World!")), "Hello World!");
assert_eq!(
format!("{}", Scalar::from("Hello World!")),
"\"Hello World!\""
);
assert_eq!(format!("{}", Scalar::null(DType::Utf8(Nullable))), "null");
}

Expand All @@ -169,7 +172,7 @@ mod tests {
NonNullable
)
),
"48,65,6c,6c,6f,20,57,6f,72,6c,64,21"
"\"48,65,6c,6c,6f,20,57,6f,72,6c,64,21\""
);
assert_eq!(format!("{}", Scalar::null(DType::Binary(Nullable))), "null");
}
Expand Down
2 changes: 1 addition & 1 deletion vortex-scalar/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl Display for InnerScalarValue {
&bufstr.as_str()[bufstr.len() - 5..bufstr.len()],
)
} else {
write!(f, "{}", bufstr.as_str())
write!(f, "\"{}\"", bufstr.as_str())
}
}
Self::List(elems) => {
Expand Down
8 changes: 5 additions & 3 deletions vortex-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use vortex_array::{ArrayDType, Canonical, IntoArrayData};
use vortex_dtype::DType;
use vortex_error::VortexResult;
use vortex_expr::forms::cnf::cnf;
use vortex_expr::transform::simplify_typed::simplify_typed;
use vortex_expr::{lit, or, ExprRef};

/// Represents a scan operation to read data from a set of rows, with an optional filter expression,
Expand All @@ -24,8 +25,6 @@ use vortex_expr::{lit, or, ExprRef};
/// the second filter over the reduced set of rows.
#[derive(Debug, Clone)]
pub struct Scanner {
#[allow(dead_code)]
dtype: DType,
projection: ExprRef,
rev_filter: Box<[ExprRef]>,
projection_dtype: DType,
Expand All @@ -38,13 +37,17 @@ pub struct Scanner {
impl Scanner {
/// Create a new scan with the given projection and optional filter.
pub fn new(dtype: DType, projection: ExprRef, filter: Option<ExprRef>) -> VortexResult<Self> {
let projection = simplify_typed(projection, dtype.clone())?;

// TODO(ngates): compute and cache a FieldMask based on the referenced fields.
// Where FieldMask ~= Vec<FieldPath>
let result_dtype = projection
.evaluate(&Canonical::empty(&dtype)?.into_array())?
.dtype()
.clone();

let filter = filter.map(|f| simplify_typed(f, dtype)).transpose()?;

let conjuncts: Box<[ExprRef]> = if let Some(filter) = filter {
let conjuncts = cnf(filter)?;
conjuncts
Expand All @@ -63,7 +66,6 @@ impl Scanner {
};

Ok(Self {
dtype,
projection,
rev_filter: conjuncts,
projection_dtype: result_dtype,
Expand Down

0 comments on commit a8aa9a0

Please sign in to comment.