From a34ad47466dd7673ee576369b024efda4f52ad2f Mon Sep 17 00:00:00 2001 From: Gabor Gevay Date: Mon, 2 Dec 2024 17:50:53 +0100 Subject: [PATCH 1/3] Tidy `ExplainConfig` by collecting analyses into one group --- src/repr/src/explain.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/repr/src/explain.rs b/src/repr/src/explain.rs index c209cca4bec5a..52366753340ac 100644 --- a/src/repr/src/explain.rs +++ b/src/repr/src/explain.rs @@ -154,22 +154,30 @@ impl From for ExplainError { /// A set of options for controlling the output of [`Explain`] implementations. #[derive(Clone, Debug)] pub struct ExplainConfig { - /// Show the number of columns. + // Analyses: + // (These are shown only if the Analysis is supported by the backing IR.) + /// Show the `SubtreeSize` Analysis in the explanation. + pub subtree_size: bool, + /// Show the number of columns, i.e., the `Arity` Analysis. pub arity: bool, - /// Show cardinality information. + /// Show the types, i.e., the `RelationType` Analysis. + pub types: bool, + /// Show the sets of unique keys, i.e., the `UniqueKeys` Analysis. + pub keys: bool, + /// Show the `NonNegative` Analysis. + pub non_negative: bool, + /// Show the `Cardinality` Analysis. pub cardinality: bool, - /// Show inferred column names. + /// Show the `ColumnNames` Analysis. pub column_names: bool, + + // Other display options: /// Render implemented MIR `Join` nodes in a way which reflects the implementation. pub join_impls: bool, /// Use inferred column names when rendering scalar and aggregate expressions. pub humanized_exprs: bool, - /// Show the sets of unique keys. - pub keys: bool, /// Restrict output trees to linear chains. Ignored if `raw_plans` is set. pub linear_chains: bool, - /// Show the `non_negative` in the explanation if it is supported by the backing IR. - pub non_negative: bool, /// Show the slow path plan even if a fast path plan was created. Useful for debugging. /// Enforced if `timing` is set. pub no_fast_path: bool, @@ -183,14 +191,11 @@ pub struct ExplainConfig { pub raw_syntax: bool, /// Anonymize literals in the plan. pub redacted: bool, - /// Show the `subtree_size` attribute in the explanation if it is supported by the backing IR. - pub subtree_size: bool, /// Print optimization timings. pub timing: bool, - /// Show the `type` attribute in the explanation. - pub types: bool, /// Show MFP pushdown information. pub filter_pushdown: bool, + /// Optimizer feature flags. pub features: OptimizerFeatureOverrides, } From 9270d5580774e521f47f971ae4717a88756ee6c8 Mon Sep 17 00:00:00 2001 From: Gabor Gevay Date: Mon, 2 Dec 2024 18:23:15 +0100 Subject: [PATCH 2/3] Update mentions of `Attribute` to mention `Analysis` The Attribute framework evolved into the Analysis framework. However, the EXPLAIN code still had a bunch of things whose names referred to Attributes. This commit just renames these things and fixes some comments. --- src/adapter/src/explain.rs | 2 +- src/adapter/src/explain/mir.rs | 4 +-- src/expr-parser/src/parser.rs | 44 +++++++++++------------ src/expr/src/explain/text.rs | 64 ++++++++++++++++++---------------- src/repr/src/explain.rs | 46 ++++++++++++------------ src/transform/src/analysis.rs | 48 ++++++++++++------------- 6 files changed, 106 insertions(+), 102 deletions(-) diff --git a/src/adapter/src/explain.rs b/src/adapter/src/explain.rs index a9237ce5f2338..0f19a59eafa2f 100644 --- a/src/adapter/src/explain.rs +++ b/src/adapter/src/explain.rs @@ -90,7 +90,7 @@ where /// In the long term, this method and [`explain_dataflow`] should be unified. In /// order to do that, however, we first need to generalize the role /// [`DataflowMetainfo`] as a carrier of metainformation for the optimization -/// pass in general, and not for a specific strucutre representing an +/// pass in general, and not for a specific structure representing an /// intermediate result. pub(crate) fn explain_plan( mut plan: T, diff --git a/src/adapter/src/explain/mir.rs b/src/adapter/src/explain/mir.rs index 5598c58472618..2c62dd54e2538 100644 --- a/src/adapter/src/explain/mir.rs +++ b/src/adapter/src/explain/mir.rs @@ -12,8 +12,8 @@ //! The specialized [`Explain`] implementation for an [`MirRelationExpr`] //! wrapped in an [`Explainable`] newtype struct allows us to interpret more //! [`mz_repr::explain::ExplainConfig`] options. This is the case because -//! attribute derivation and let normalization are defined in [`mz_transform`] -//! and conssequently are not available for the default [`Explain`] +//! Analysis derivation and Let normalization are defined in [`mz_transform`] +//! and consequently are not available for the default [`Explain`] //! implementation for [`MirRelationExpr`] in [`mz_expr`]. use mz_compute_types::dataflows::DataflowDescription; diff --git a/src/expr-parser/src/parser.rs b/src/expr-parser/src/parser.rs index 7400a0a618ced..e7d63ab8b3cb2 100644 --- a/src/expr-parser/src/parser.rs +++ b/src/expr-parser/src/parser.rs @@ -120,12 +120,12 @@ mod relation { let constant = input.parse::()?; let parse_typ = |input: ParseStream| -> syn::Result { - let attrs = attributes::parse_attributes(input)?; - let Some(column_types) = attrs.types else { - let msg = "Missing expected `types` attribute for Constant line"; + let analyses = analyses::parse_analyses(input)?; + let Some(column_types) = analyses.types else { + let msg = "Missing expected `types` analyses for Constant line"; Err(Error::new(input.span(), msg))? }; - let keys = attrs.keys.unwrap_or_default(); + let keys = analyses.keys.unwrap_or_default(); Ok(RelationType { column_types, keys }) }; @@ -200,13 +200,13 @@ mod relation { if recursive { let (mut ids, mut values, mut limits) = (vec![], vec![], vec![]); - for (id, attrs, value) in ctes.into_iter().rev() { + for (id, analyses, value) in ctes.into_iter().rev() { let typ = { - let Some(column_types) = attrs.types else { - let msg = format!("`let {}` needs a `types` attribute", id); + let Some(column_types) = analyses.types else { + let msg = format!("`let {}` needs a `types` analyses", id); Err(Error::new(with.span(), msg))? }; - let keys = attrs.keys.unwrap_or_default(); + let keys = analyses.keys.unwrap_or_default(); RelationType { column_types, keys } }; @@ -251,7 +251,7 @@ mod relation { fn parse_cte( ctx: CtxRef, input: ParseStream, - ) -> syn::Result<(LocalId, attributes::Attributes, MirRelationExpr)> { + ) -> syn::Result<(LocalId, analyses::Analyses, MirRelationExpr)> { let cte = input.parse::()?; let ident = input.parse::()?; @@ -259,12 +259,12 @@ mod relation { input.parse::()?; - let attrs = attributes::parse_attributes(input)?; + let analyses = analyses::parse_analyses(input)?; let parse_value = ParseChildren::new(input, cte.span().start()); let value = parse_value.parse_one(ctx, parse_expr)?; - Ok((id, attrs, value)) + Ok((id, analyses, value)) } fn parse_project(ctx: CtxRef, input: ParseStream) -> Result { @@ -953,7 +953,7 @@ mod scalar { let typ = if input.eat(kw::null) { packer.push(Datum::Null); input.parse::()?; - attributes::parse_scalar_type(input)?.nullable(true) + analyses::parse_scalar_type(input)?.nullable(true) } else { match input.parse::()? { syn::Lit::Str(l) => { @@ -1237,21 +1237,21 @@ mod row { } } -mod attributes { +mod analyses { use mz_repr::{ColumnType, ScalarType}; use super::*; #[derive(Default)] - pub struct Attributes { + pub struct Analyses { pub types: Option>, pub keys: Option>>, } - pub fn parse_attributes(input: ParseStream) -> syn::Result { - let mut attributes = Attributes::default(); + pub fn parse_analyses(input: ParseStream) -> syn::Result { + let mut analyses = Analyses::default(); - // Attributes are optional, appearing after a `//` at the end of the + // Analyses are optional, appearing after a `//` at the end of the // line. However, since the syn lexer eats comments, we assume that `//` // was replaced with `::` upfront. if input.eat(syn::Token![::]) { @@ -1260,7 +1260,7 @@ mod attributes { let (start, end) = (inner.span().start(), inner.span().end()); if start.line != end.line { - let msg = "attributes should not span more than one line".to_string(); + let msg = "analyses should not span more than one line".to_string(); Err(Error::new(inner.span(), msg))? } @@ -1270,17 +1270,17 @@ mod attributes { "types" => { inner.parse::()?; let value = inner.parse::()?.value(); - attributes.types = Some(parse_types.parse_str(&value)?); + analyses.types = Some(parse_types.parse_str(&value)?); } // TODO: support keys key => { - let msg = format!("unexpected attribute type `{}`", key); + let msg = format!("unexpected analysis type `{}`", key); Err(Error::new(inner.span(), msg))?; } } } } - Ok(attributes) + Ok(analyses) } fn parse_types(input: ParseStream) -> syn::Result> { @@ -1394,7 +1394,7 @@ mod def { input.parse::()?; let column_name = input.parse::()?.to_string(); input.parse::()?; - let column_type = attributes::parse_column_type(input)?; + let column_type = analyses::parse_column_type(input)?; Ok((column_name, column_type)) } diff --git a/src/expr/src/explain/text.rs b/src/expr/src/explain/text.rs index cd50ca085ba71..bb4c712a903ea 100644 --- a/src/expr/src/explain/text.rs +++ b/src/expr/src/explain/text.rs @@ -18,7 +18,7 @@ use mz_ore::soft_assert_eq_or_log; use mz_ore::str::{closure_to_display, separated, Indent, IndentLike, StrExt}; use mz_repr::explain::text::DisplayText; use mz_repr::explain::{ - CompactScalars, ExprHumanizer, HumanizedAttributes, IndexUsageType, Indices, + CompactScalars, ExprHumanizer, HumanizedAnalyses, IndexUsageType, Indices, PlanRenderingContext, RenderingContext, ScalarOps, }; use mz_repr::{Datum, Diff, GlobalId, Row}; @@ -46,8 +46,10 @@ where if let Some(finishing) = &self.context.finishing { if ctx.config.humanized_exprs { - let attrs = ctx.annotations.get(&self.plan.plan); - let cols = attrs.map(|attrs| attrs.column_names.clone()).flatten(); + let analyses = ctx.annotations.get(&self.plan.plan); + let cols = analyses + .map(|analyses| analyses.column_names.clone()) + .flatten(); mode.expr(finishing, cols.as_ref()).fmt_text(f, &mut ctx)?; } else { mode.expr(finishing, None).fmt_text(f, &mut ctx)?; @@ -112,8 +114,10 @@ where // If present, a RowSetFinishing always applies to the first rendered plan. Some(finishing) if no == 0 => { if ctx.config.humanized_exprs { - let attrs = ctx.annotations.get(plan.plan); - let cols = attrs.map(|attrs| attrs.column_names.clone()).flatten(); + let analyses = ctx.annotations.get(plan.plan); + let cols = analyses + .map(|analyses| analyses.column_names.clone()) + .flatten(); mode.expr(finishing, cols.as_ref()).fmt_text(f, ctx)?; } else { mode.expr(finishing, None).fmt_text(f, ctx)?; @@ -295,7 +299,7 @@ impl MirRelationExpr { Ok(rows) => { if !rows.is_empty() { write!(f, "{}Constant", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; ctx.indented(|ctx| { fmt_text_constant_rows( f, @@ -306,7 +310,7 @@ impl MirRelationExpr { })?; } else { write!(f, "{}Constant ", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; } } Err(err) => { @@ -338,11 +342,11 @@ impl MirRelationExpr { Ok(()) })?; write!(f, "{}Return", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; ctx.indented(|ctx| head.fmt_text(f, ctx))?; } else { write!(f, "{}Return", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; ctx.indented(|ctx| head.fmt_text(f, ctx))?; writeln!(f, "{}With", ctx.indent)?; ctx.indented(|ctx| { @@ -379,7 +383,7 @@ impl MirRelationExpr { unreachable!(); // We exclude this case in `as_explain_single_plan`. } else { write!(f, "{}Return", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; ctx.indented(|ctx| head.fmt_text(f, ctx))?; write!(f, "{}With Mutually Recursive", ctx.indent)?; if let Some(limit) = all_limits_same { @@ -475,7 +479,7 @@ impl MirRelationExpr { } } } - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; } Project { outputs, input } => { FmtNode { @@ -483,7 +487,7 @@ impl MirRelationExpr { let outputs = mode.seq(outputs, self.column_names(ctx)); let outputs = CompactScalars(outputs); write!(f, "{}Project ({})", ctx.indent, outputs)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -495,7 +499,7 @@ impl MirRelationExpr { let scalars = mode.seq(scalars, self.column_names(ctx)); let scalars = CompactScalars(scalars); write!(f, "{}Map ({})", ctx.indent, scalars)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -507,7 +511,7 @@ impl MirRelationExpr { let exprs = mode.seq(exprs, input.column_names(ctx)); let exprs = CompactScalars(exprs); write!(f, "{}FlatMap {}({})", ctx.indent, func, exprs)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -524,7 +528,7 @@ impl MirRelationExpr { let predicates = separated(" AND ", predicates); write!(f, "{}Filter {}", ctx.indent, predicates)?; } - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -557,7 +561,7 @@ impl MirRelationExpr { write!(f, " type={}", name)?; } - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; if ctx.config.join_impls { let input_name = &|pos: usize| -> String { @@ -712,7 +716,7 @@ impl MirRelationExpr { Some(literal_constraints.clone()), cse_id, )?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; } Reduce { group_key, @@ -749,7 +753,7 @@ impl MirRelationExpr { if let Some(expected_group_size) = expected_group_size { write!(f, " exp_group_size={}", expected_group_size)?; } - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -794,7 +798,7 @@ impl MirRelationExpr { if let Some(expected_group_size) = expected_group_size { write!(f, " exp_group_size={}", expected_group_size)?; } - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -804,7 +808,7 @@ impl MirRelationExpr { FmtNode { fmt_root: |f, ctx| { write!(f, "{}Negate", ctx.indent)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -814,7 +818,7 @@ impl MirRelationExpr { FmtNode { fmt_root: |f, ctx| { write!(f, "{}Threshold", ctx.indent)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -822,7 +826,7 @@ impl MirRelationExpr { } Union { base, inputs } => { write!(f, "{}Union", ctx.indent)?; - self.fmt_attributes(f, ctx)?; + self.fmt_analyses(f, ctx)?; ctx.indented(|ctx| { base.fmt_text(f, ctx)?; for input in inputs.iter() { @@ -843,7 +847,7 @@ impl MirRelationExpr { let keys = separated("], [", keys); write!(f, " keys=[[{}]]", keys)?; - self.fmt_attributes(f, ctx) + self.fmt_analyses(f, ctx) }, fmt_children: |f, ctx| input.fmt_text(f, ctx), } @@ -854,16 +858,16 @@ impl MirRelationExpr { Ok(()) } - fn fmt_attributes( + fn fmt_analyses( &self, f: &mut fmt::Formatter<'_>, ctx: &PlanRenderingContext<'_, MirRelationExpr>, ) -> fmt::Result { - if ctx.config.requires_attributes() { - if let Some(attrs) = ctx.annotations.get(self) { - writeln!(f, " {}", HumanizedAttributes::new(attrs, ctx)) + if ctx.config.requires_analyses() { + if let Some(analyses) = ctx.annotations.get(self) { + writeln!(f, " {}", HumanizedAnalyses::new(analyses, ctx)) } else { - writeln!(f, " // error: no attrs for subtree in map") + writeln!(f, " // error: no analyses for subtree in map") } } else { writeln!(f) @@ -876,8 +880,8 @@ impl MirRelationExpr { ) -> Option<&Vec> { if !ctx.config.humanized_exprs { None - } else if let Some(attrs) = ctx.annotations.get(self) { - attrs.column_names.as_ref() + } else if let Some(analyses) = ctx.annotations.get(self) { + analyses.column_names.as_ref() } else { None } diff --git a/src/repr/src/explain.rs b/src/repr/src/explain.rs index 52366753340ac..9fa10bbd4eca5 100644 --- a/src/repr/src/explain.rs +++ b/src/repr/src/explain.rs @@ -228,7 +228,7 @@ impl Default for ExplainConfig { } impl ExplainConfig { - pub fn requires_attributes(&self) -> bool { + pub fn requires_analyses(&self) -> bool { self.subtree_size || self.non_negative || self.arity @@ -383,7 +383,7 @@ impl<'a> AsRef<&'a dyn ExprHumanizer> for RenderingContext<'a> { pub struct PlanRenderingContext<'a, T> { pub indent: Indent, pub humanizer: &'a dyn ExprHumanizer, - pub annotations: BTreeMap<&'a T, Attributes>, + pub annotations: BTreeMap<&'a T, Analyses>, pub config: &'a ExplainConfig, } @@ -391,7 +391,7 @@ impl<'a, T> PlanRenderingContext<'a, T> { pub fn new( indent: Indent, humanizer: &'a dyn ExprHumanizer, - annotations: BTreeMap<&'a T, Attributes>, + annotations: BTreeMap<&'a T, Analyses>, config: &'a ExplainConfig, ) -> PlanRenderingContext<'a, T> { PlanRenderingContext { @@ -612,16 +612,16 @@ pub trait ScalarOps { } /// A somewhat ad-hoc way to keep carry a plan with a set -/// of attributes derived for each node in that plan. +/// of analyses derived for each node in that plan. #[allow(missing_debug_implementations)] pub struct AnnotatedPlan<'a, T> { pub plan: &'a T, - pub annotations: BTreeMap<&'a T, Attributes>, + pub annotations: BTreeMap<&'a T, Analyses>, } -/// A container for derived attributes. +/// A container for derived analyses. #[derive(Clone, Default, Debug)] -pub struct Attributes { +pub struct Analyses { pub non_negative: Option, pub subtree_size: Option, pub arity: Option, @@ -632,47 +632,47 @@ pub struct Attributes { } #[derive(Debug, Clone)] -pub struct HumanizedAttributes<'a> { - attrs: &'a Attributes, +pub struct HumanizedAnalyses<'a> { + analyses: &'a Analyses, humanizer: &'a dyn ExprHumanizer, config: &'a ExplainConfig, } -impl<'a> HumanizedAttributes<'a> { - pub fn new(attrs: &'a Attributes, ctx: &PlanRenderingContext<'a, T>) -> Self { +impl<'a> HumanizedAnalyses<'a> { + pub fn new(analyses: &'a Analyses, ctx: &PlanRenderingContext<'a, T>) -> Self { Self { - attrs, + analyses, humanizer: ctx.humanizer, config: ctx.config, } } } -impl<'a> fmt::Display for HumanizedAttributes<'a> { - // Attribute rendering is guarded by the ExplainConfig flag for each - // attribute. This is needed because we might have derived attributes that +impl<'a> Display for HumanizedAnalyses<'a> { + // Analysis rendering is guarded by the ExplainConfig flag for each + // Analysis. This is needed because we might have derived Analysis that // are not explicitly requested (such as column_names), in which case we // don't want to display them. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut builder = f.debug_struct("//"); if self.config.subtree_size { - let subtree_size = self.attrs.subtree_size.expect("subtree_size"); + let subtree_size = self.analyses.subtree_size.expect("subtree_size"); builder.field("subtree_size", &subtree_size); } if self.config.non_negative { - let non_negative = self.attrs.non_negative.expect("non_negative"); + let non_negative = self.analyses.non_negative.expect("non_negative"); builder.field("non_negative", &non_negative); } if self.config.arity { - let arity = self.attrs.arity.expect("arity"); + let arity = self.analyses.arity.expect("arity"); builder.field("arity", &arity); } if self.config.types { - let types = match self.attrs.types.as_ref().expect("types") { + let types = match self.analyses.types.as_ref().expect("types") { Some(types) => { let types = types .into_iter() @@ -688,7 +688,7 @@ impl<'a> fmt::Display for HumanizedAttributes<'a> { if self.config.keys { let keys = self - .attrs + .analyses .keys .as_ref() .expect("keys") @@ -699,12 +699,12 @@ impl<'a> fmt::Display for HumanizedAttributes<'a> { } if self.config.cardinality { - let cardinality = self.attrs.cardinality.as_ref().expect("cardinality"); + let cardinality = self.analyses.cardinality.as_ref().expect("cardinality"); builder.field("cardinality", cardinality); } if self.config.column_names { - let column_names = self.attrs.column_names.as_ref().expect("column_names"); + let column_names = self.analyses.column_names.as_ref().expect("column_names"); let column_names = column_names.into_iter().enumerate().map(|(i, c)| { if c.is_empty() { Cow::Owned(format!("#{i}")) diff --git a/src/transform/src/analysis.rs b/src/transform/src/analysis.rs index 337badbebb926..3bfa5a1dbbe00 100644 --- a/src/transform/src/analysis.rs +++ b/src/transform/src/analysis.rs @@ -1196,16 +1196,16 @@ mod column_names { } mod explain { - //! Derived attributes framework and definitions. + //! Derived Analysis framework and definitions. use std::collections::BTreeMap; use mz_expr::explain::ExplainContext; use mz_expr::MirRelationExpr; use mz_ore::stack::RecursionLimitError; - use mz_repr::explain::{AnnotatedPlan, Attributes}; + use mz_repr::explain::{Analyses, AnnotatedPlan}; - // Attributes should have shortened paths when exported. + // Analyses should have shortened paths when exported. use super::DerivedBuilder; impl<'c> From<&ExplainContext<'c>> for DerivedBuilder<'c> { @@ -1239,19 +1239,19 @@ mod explain { } /// Produce an [`AnnotatedPlan`] wrapping the given [`MirRelationExpr`] along - /// with [`Attributes`] derived from the given context configuration. + /// with [`Analyses`] derived from the given context configuration. pub fn annotate_plan<'a>( plan: &'a MirRelationExpr, context: &'a ExplainContext, ) -> Result, RecursionLimitError> { - let mut annotations = BTreeMap::<&MirRelationExpr, Attributes>::default(); + let mut annotations = BTreeMap::<&MirRelationExpr, Analyses>::default(); let config = context.config; - // We want to annotate the plan with attributes in the following cases: - // 1. An attribute was explicitly requested in the ExplainConfig. + // We want to annotate the plan with analyses in the following cases: + // 1. An Analysis was explicitly requested in the ExplainConfig. // 2. Humanized expressions were requested in the ExplainConfig (in which - // case we need to derive the ColumnNames attribute). - if config.requires_attributes() || config.humanized_exprs { + // case we need to derive the ColumnNames Analysis). + if config.requires_analyses() || config.humanized_exprs { // get the annotation keys let subtree_refs = plan.post_order_vec(); // get the annotation values @@ -1263,8 +1263,8 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.subtree_size = Some(*subtree_size); + let analyses = annotations.entry(expr).or_default(); + analyses.subtree_size = Some(*subtree_size); } } if config.non_negative { @@ -1272,8 +1272,8 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.non_negative = Some(*non_negative); + let analyses = annotations.entry(expr).or_default(); + analyses.non_negative = Some(*non_negative); } } @@ -1282,8 +1282,8 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.arity = Some(*arity); + let analyses = annotations.entry(expr).or_default(); + analyses.arity = Some(*arity); } } @@ -1295,8 +1295,8 @@ mod explain { .unwrap() .into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.types = Some(types.clone()); + let analyses = annotations.entry(expr).or_default(); + analyses.types = Some(types.clone()); } } @@ -1305,8 +1305,8 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.keys = Some(keys.clone()); + let analyses = annotations.entry(expr).or_default(); + analyses.keys = Some(keys.clone()); } } @@ -1315,8 +1315,8 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); - attrs.cardinality = Some(card.to_string()); + let analyses = annotations.entry(expr).or_default(); + analyses.cardinality = Some(card.to_string()); } } @@ -1325,12 +1325,12 @@ mod explain { subtree_refs.iter(), derived.results::().unwrap().into_iter(), ) { - let attrs = annotations.entry(expr).or_default(); + let analyses = annotations.entry(expr).or_default(); let value = column_names .iter() .map(|column_name| column_name.humanize(context.humanizer)) .collect(); - attrs.column_names = Some(value); + analyses.column_names = Some(value); } } } @@ -1339,7 +1339,7 @@ mod explain { } } -/// Definition and helper structs for the [`Cardinality`] attribute. +/// Definition and helper structs for the [`Cardinality`] Analysis. mod cardinality { use std::collections::{BTreeMap, BTreeSet}; From 6dc335eaa0146d6071b6f28479c1068e61701822 Mon Sep 17 00:00:00 2001 From: Gabor Gevay Date: Mon, 2 Dec 2024 19:55:31 +0100 Subject: [PATCH 3/3] Add an EXPLAIN option to show the Equivalences Analysis --- src/repr/src/explain.rs | 13 +++++++ src/sql-lexer/src/keywords.txt | 1 + src/sql-parser/src/ast/defs/statement.rs | 2 + src/sql-parser/tests/testdata/explain | 7 ++++ src/sql/src/plan/statement/dml.rs | 2 + src/transform/src/analysis.rs | 18 +++++++++ src/transform/src/analysis/equivalences.rs | 15 ++++++- src/transform/tests/test_runner.rs | 1 + .../explain/optimized_plan_as_text.slt | 39 +++++++++++++++++++ 9 files changed, 97 insertions(+), 1 deletion(-) diff --git a/src/repr/src/explain.rs b/src/repr/src/explain.rs index 9fa10bbd4eca5..39fe0f75346c3 100644 --- a/src/repr/src/explain.rs +++ b/src/repr/src/explain.rs @@ -170,6 +170,10 @@ pub struct ExplainConfig { pub cardinality: bool, /// Show the `ColumnNames` Analysis. pub column_names: bool, + /// Show the `Equivalences` Analysis. + pub equivalences: bool, + // TODO: add an option to show the `Monotonic` Analysis. This is non-trivial, because this + // Analysis needs the set of monotonic GlobalIds, which are cumbersome to pass around. // Other display options: /// Render implemented MIR `Join` nodes in a way which reflects the implementation. @@ -222,6 +226,7 @@ impl Default for ExplainConfig { subtree_size: false, timing: false, types: false, + equivalences: false, features: Default::default(), } } @@ -236,6 +241,7 @@ impl ExplainConfig { || self.keys || self.cardinality || self.column_names + || self.equivalences } } @@ -629,6 +635,7 @@ pub struct Analyses { pub keys: Option>>, pub cardinality: Option, pub column_names: Option>, + pub equivalences: Option, } #[derive(Debug, Clone)] @@ -716,6 +723,11 @@ impl<'a> Display for HumanizedAnalyses<'a> { builder.field("column_names", &column_names); } + if self.config.equivalences { + let equivs = self.analyses.equivalences.as_ref().expect("equivalences"); + builder.field("equivs", equivs); + } + builder.finish() } } @@ -938,6 +950,7 @@ mod tests { raw_plans: false, raw_syntax: false, subtree_size: false, + equivalences: false, timing: true, types: false, features: Default::default(), diff --git a/src/sql-lexer/src/keywords.txt b/src/sql-lexer/src/keywords.txt index 958083e6e39e9..21d848f64464f 100644 --- a/src/sql-lexer/src/keywords.txt +++ b/src/sql-lexer/src/keywords.txt @@ -151,6 +151,7 @@ End Endpoint Enforced Envelope +Equivalences Error Errors Escape diff --git a/src/sql-parser/src/ast/defs/statement.rs b/src/sql-parser/src/ast/defs/statement.rs index 4453aa7a04d46..4452d9f1d3543 100644 --- a/src/sql-parser/src/ast/defs/statement.rs +++ b/src/sql-parser/src/ast/defs/statement.rs @@ -3883,6 +3883,7 @@ pub enum ExplainPlanOptionName { SubtreeSize, Timing, Types, + Equivalences, ReoptimizeImportedViews, EnableNewOuterJoinLowering, EnableEagerDeltaJoins, @@ -3917,6 +3918,7 @@ impl WithOptionName for ExplainPlanOptionName { | Self::SubtreeSize | Self::Timing | Self::Types + | Self::Equivalences | Self::ReoptimizeImportedViews | Self::EnableNewOuterJoinLowering | Self::EnableEagerDeltaJoins diff --git a/src/sql-parser/tests/testdata/explain b/src/sql-parser/tests/testdata/explain index 295568931c86e..ab4a27ae87833 100644 --- a/src/sql-parser/tests/testdata/explain +++ b/src/sql-parser/tests/testdata/explain @@ -305,3 +305,10 @@ EXPLAIN FILTER PUSHDOWN FOR MATERIALIZED VIEW whatever EXPLAIN FILTER PUSHDOWN FOR MATERIALIZED VIEW whatever => ExplainPushdown(ExplainPushdownStatement { explainee: MaterializedView(Name(UnresolvedItemName([Ident("whatever")]))) }) + +parse-statement +EXPLAIN WITH (ARITY, EQUIVALENCES, HUMANIZED EXPRESSIONS) CREATE MATERIALIZED VIEW mv AS SELECT 665 +---- +EXPLAIN WITH (ARITY, EQUIVALENCES, HUMANIZED EXPRESSIONS) CREATE MATERIALIZED VIEW mv AS SELECT 665 +=> +ExplainPlan(ExplainPlanStatement { stage: None, with_options: [ExplainPlanOption { name: Arity, value: None }, ExplainPlanOption { name: Equivalences, value: None }, ExplainPlanOption { name: HumanizedExpressions, value: None }], format: None, explainee: CreateMaterializedView(CreateMaterializedViewStatement { if_exists: Error, name: UnresolvedItemName([Ident("mv")]), columns: [], in_cluster: None, query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Value(Number("665")), alias: None }], from: [], selection: None, group_by: [], having: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None, with_options: [] }, false) }) diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index f5c49e50597cf..9ee68720d7a22 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -364,6 +364,7 @@ generate_extracted_config!( (SubtreeSize, bool, Default(false)), (Timing, bool, Default(false)), (Types, bool, Default(false)), + (Equivalences, bool, Default(false)), (ReoptimizeImportedViews, Option, Default(None)), (EnableNewOuterJoinLowering, Option, Default(None)), (EnableEagerDeltaJoins, Option, Default(None)), @@ -406,6 +407,7 @@ impl TryFrom for ExplainConfig { raw_syntax: v.raw_syntax, redacted: v.redacted, subtree_size: v.subtree_size, + equivalences: v.equivalences, timing: v.timing, types: v.types, // The ones that are initialized with `Default::default()` are not wired up to EXPLAIN. diff --git a/src/transform/src/analysis.rs b/src/transform/src/analysis.rs index 3bfa5a1dbbe00..e4e50c9e18b02 100644 --- a/src/transform/src/analysis.rs +++ b/src/transform/src/analysis.rs @@ -1205,6 +1205,8 @@ mod explain { use mz_ore::stack::RecursionLimitError; use mz_repr::explain::{Analyses, AnnotatedPlan}; + use crate::analysis::equivalences::Equivalences; + // Analyses should have shortened paths when exported. use super::DerivedBuilder; @@ -1234,6 +1236,9 @@ mod explain { if context.config.column_names || context.config.humanized_exprs { builder.require(super::ColumnNames); } + if context.config.equivalences { + builder.require(Equivalences); + } builder } } @@ -1333,6 +1338,19 @@ mod explain { analyses.column_names = Some(value); } } + + if config.equivalences { + for (expr, equivs) in std::iter::zip( + subtree_refs.iter(), + derived.results::().unwrap().into_iter(), + ) { + let analyses = annotations.entry(expr).or_default(); + analyses.equivalences = Some(match equivs.as_ref() { + Some(equivs) => equivs.to_string(), + None => "".to_string(), + }); + } + } } Ok(AnnotatedPlan { plan, annotations }) diff --git a/src/transform/src/analysis/equivalences.rs b/src/transform/src/analysis/equivalences.rs index de564820d36b9..2fefb92cf4333 100644 --- a/src/transform/src/analysis/equivalences.rs +++ b/src/transform/src/analysis/equivalences.rs @@ -16,8 +16,10 @@ //! equivalences classes, each a list of equivalent expressions. use std::collections::BTreeMap; +use std::fmt::Formatter; use mz_expr::{Id, MirRelationExpr, MirScalarExpr}; +use mz_ore::str::{bracketed, separated}; use mz_repr::{ColumnType, Datum}; use crate::analysis::{Analysis, Lattice}; @@ -318,7 +320,7 @@ pub struct EquivalenceClasses { /// The first element should be the "canonical" simplest element, that any other element /// can be replaced by. /// These classes are unified whenever possible, to minimize the number of classes. - /// They are only guaranteed to form an equivalence relation after a call to `minimimize`, + /// They are only guaranteed to form an equivalence relation after a call to `minimize`, /// which refreshes both `self.classes` and `self.remap`. pub classes: Vec>, @@ -334,6 +336,17 @@ pub struct EquivalenceClasses { remap: BTreeMap, } +impl std::fmt::Display for EquivalenceClasses { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // Only show `classes`. + let classes = self + .classes + .iter() + .map(|class| format!("{}", bracketed("[", "]", separated(", ", class)))); + write!(f, "{}", bracketed("[", "]", separated(", ", classes))) + } +} + impl EquivalenceClasses { /// Comparator function for the complexity of scalar expressions. Simpler expressions are /// smaller. Can be used when we need to decide which of several equivalent expressions to use. diff --git a/src/transform/tests/test_runner.rs b/src/transform/tests/test_runner.rs index 1460ef34c64b6..53bc76efa8798 100644 --- a/src/transform/tests/test_runner.rs +++ b/src/transform/tests/test_runner.rs @@ -141,6 +141,7 @@ mod tests { raw_plans: false, raw_syntax: false, subtree_size: false, + equivalences: false, timing: false, types: format_contains("types"), ..ExplainConfig::default() diff --git a/test/sqllogictest/explain/optimized_plan_as_text.slt b/test/sqllogictest/explain/optimized_plan_as_text.slt index 879af6203f45e..27184c7e8b765 100644 --- a/test/sqllogictest/explain/optimized_plan_as_text.slt +++ b/test/sqllogictest/explain/optimized_plan_as_text.slt @@ -1577,3 +1577,42 @@ Source materialize.public.t4 Target cluster: no_replicas EOF + +statement ok +CREATE TABLE t5( + x int, + y int NOT NULL, + z int +); + +statement ok +CREATE TABLE t6( + a int NOT NULL, + b int +); + +# WITH(EQUIVALENCES) +query T multiline +EXPLAIN WITH(EQUIVALENCES) +SELECT * +FROM t5, t6 +WHERE x = a AND b IN (8,9); +---- +Explained Query: + Project (#0..=#2, #0, #4) // { equivs: "[[#0, #3], [false, (#0) IS NULL, (#1) IS NULL], [true, ((#4 = 8) OR (#4 = 9))]]" } + Join on=(#0 = #3) type=differential // { equivs: "[[#0, #3], [false, (#0) IS NULL, (#1) IS NULL], [true, ((#4 = 8) OR (#4 = 9))]]" } + ArrangeBy keys=[[#0]] // { equivs: "[[false, (#0) IS NULL, (#1) IS NULL]]" } + Filter (#0) IS NOT NULL // { equivs: "[[false, (#0) IS NULL, (#1) IS NULL]]" } + ReadStorage materialize.public.t5 // { equivs: "[[false, (#1) IS NULL]]" } + ArrangeBy keys=[[#0]] // { equivs: "[[false, (#0) IS NULL], [true, ((#1 = 8) OR (#1 = 9))]]" } + Filter ((#1 = 8) OR (#1 = 9)) // { equivs: "[[false, (#0) IS NULL], [true, ((#1 = 8) OR (#1 = 9))]]" } + ReadStorage materialize.public.t6 // { equivs: "[[false, (#0) IS NULL]]" } + +Source materialize.public.t5 + filter=((#0) IS NOT NULL) +Source materialize.public.t6 + filter=(((#1 = 8) OR (#1 = 9))) + +Target cluster: no_replicas + +EOF