From 73dbd9148f2a6543e8d3ed0d64814057649014c3 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Tue, 28 Jan 2025 11:33:07 +0100 Subject: [PATCH] BigQuery: Add support for select expr star (#1680) --- src/ast/mod.rs | 13 +++--- src/ast/query.rs | 34 +++++++++++++-- src/ast/spans.rs | 49 +++++++-------------- src/dialect/bigquery.rs | 5 +++ src/dialect/mod.rs | 11 +++++ src/parser/mod.rs | 30 ++++++++++--- tests/sqlparser_bigquery.rs | 11 +++-- tests/sqlparser_common.rs | 85 +++++++++++++++++++++++++++++++++++- tests/sqlparser_duckdb.rs | 2 +- tests/sqlparser_snowflake.rs | 4 +- 10 files changed, 185 insertions(+), 59 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 491f3c7ee..0d711a2f1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -68,12 +68,13 @@ pub use self::query::{ NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableIndexHintForClause, - TableIndexHintType, TableIndexHints, TableIndexType, TableSample, TableSampleBucket, - TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, - TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, - UpdateTableFromKind, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, SetQuantifier, + Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, + TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, + TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, + TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, + TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index 09058f760..747d925c9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -586,6 +586,20 @@ impl fmt::Display for Cte { } } +/// Represents an expression behind a wildcard expansion in a projection. +/// `SELECT T.* FROM T; +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SelectItemQualifiedWildcardKind { + /// Expression is an object name. + /// e.g. `alias.*` or even `schema.table.*` + ObjectName(ObjectName), + /// Select star on an arbitrary expression. + /// e.g. `STRUCT('foo').*` + Expr(Expr), +} + /// One item of the comma-separated list following `SELECT` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -595,12 +609,24 @@ pub enum SelectItem { UnnamedExpr(Expr), /// An expression, followed by `[ AS ] alias` ExprWithAlias { expr: Expr, alias: Ident }, - /// `alias.*` or even `schema.table.*` - QualifiedWildcard(ObjectName, WildcardAdditionalOptions), + /// An expression, followed by a wildcard expansion. + /// e.g. `alias.*`, `STRUCT('foo').*` + QualifiedWildcard(SelectItemQualifiedWildcardKind, WildcardAdditionalOptions), /// An unqualified `*` Wildcard(WildcardAdditionalOptions), } +impl fmt::Display for SelectItemQualifiedWildcardKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self { + SelectItemQualifiedWildcardKind::ObjectName(object_name) => { + write!(f, "{object_name}.*") + } + SelectItemQualifiedWildcardKind::Expr(expr) => write!(f, "{expr}.*"), + } + } +} + /// Single aliased identifier /// /// # Syntax @@ -867,8 +893,8 @@ impl fmt::Display for SelectItem { match &self { SelectItem::UnnamedExpr(expr) => write!(f, "{expr}"), SelectItem::ExprWithAlias { expr, alias } => write!(f, "{expr} AS {alias}"), - SelectItem::QualifiedWildcard(prefix, additional_options) => { - write!(f, "{prefix}.*")?; + SelectItem::QualifiedWildcard(kind, additional_options) => { + write!(f, "{kind}")?; write!(f, "{additional_options}")?; Ok(()) } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 44821ffbc..58fa27aa8 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::tokenizer::Span; +use crate::ast::query::SelectItemQualifiedWildcardKind; use core::iter; +use crate::tokenizer::Span; + use super::{ dcl::SecondaryRoles, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, @@ -383,33 +385,6 @@ impl Spanned for Statement { .chain(to.iter().map(|i| i.span())), ), Statement::CreateTable(create_table) => create_table.span(), - Statement::CreateIcebergTable { - or_replace: _, - if_not_exists: _, - name, - columns, - constraints, - with_options, - comment: _, - cluster_by: _, - external_volume: _, - catalog: _, - base_location: _, - catalog_sync: _, - storage_serialization_policy: _, - data_retention_time_in_days: _, - max_data_extension_time_in_days: _, - change_tracking: _, - copy_grants: _, - with_row_access_policy: _, - with_aggregation_policy: _, - with_tags: _, - } => union_spans( - core::iter::once(name.span()) - .chain(columns.iter().map(|i| i.span())) - .chain(constraints.iter().map(|i| i.span())) - .chain(with_options.iter().map(|i| i.span())), - ), Statement::CreateVirtualTable { name, if_not_exists: _, @@ -515,7 +490,6 @@ impl Spanned for Statement { Statement::DropPolicy { .. } => Span::empty(), Statement::ShowDatabases { .. } => Span::empty(), Statement::ShowSchemas { .. } => Span::empty(), - Statement::ShowObjects { .. } => Span::empty(), Statement::ShowViews { .. } => Span::empty(), Statement::LISTEN { .. } => Span::empty(), Statement::NOTIFY { .. } => Span::empty(), @@ -1650,16 +1624,23 @@ impl Spanned for JsonPathElem { } } +impl Spanned for SelectItemQualifiedWildcardKind { + fn span(&self) -> Span { + match self { + SelectItemQualifiedWildcardKind::ObjectName(object_name) => object_name.span(), + SelectItemQualifiedWildcardKind::Expr(expr) => expr.span(), + } + } +} + impl Spanned for SelectItem { fn span(&self) -> Span { match self { SelectItem::UnnamedExpr(expr) => expr.span(), SelectItem::ExprWithAlias { expr, alias } => expr.span().union(&alias.span), - SelectItem::QualifiedWildcard(object_name, wildcard_additional_options) => union_spans( - object_name - .0 - .iter() - .map(|i| i.span()) + SelectItem::QualifiedWildcard(kind, wildcard_additional_options) => union_spans( + [kind.span()] + .into_iter() .chain(iter::once(wildcard_additional_options.span())), ), SelectItem::Wildcard(wildcard_additional_options) => wildcard_additional_options.span(), diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 716174391..b45754213 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -83,6 +83,11 @@ impl Dialect for BigQueryDialect { true } + /// See + fn supports_select_expr_star(&self) -> bool { + true + } + // See fn supports_timestamp_versioning(&self) -> bool { true diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 6329c5cfc..bc3c0c967 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -447,6 +447,17 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports wildcard expansion on + /// arbitrary expressions in projections. + /// + /// Example: + /// ```sql + /// SELECT STRUCT('foo').* FROM T + /// ``` + fn supports_select_expr_star(&self) -> bool { + false + } + /// Does the dialect support MySQL-style `'user'@'host'` grantee syntax? fn supports_user_host_grantee(&self) -> bool { false diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f31366946..7e6e861d6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1528,10 +1528,17 @@ impl<'a> Parser<'a> { // function array_agg traverses this control flow if dialect_of!(self is PostgreSqlDialect) { ending_wildcard = Some(next_token); - break; } else { - return self.expected("an identifier after '.'", next_token); + // Put back the consumed .* tokens before exiting. + // If this expression is being parsed in the + // context of a projection, then this could imply + // a wildcard expansion. For example: + // `SELECT STRUCT('foo').* FROM T` + self.prev_token(); // * + self.prev_token(); // . } + + break; } Token::SingleQuotedString(s) => { let expr = Expr::Identifier(Ident::with_quote('\'', s)); @@ -1568,18 +1575,18 @@ impl<'a> Parser<'a> { } else { self.parse_function(ObjectName::from(id_parts)) } + } else if chain.is_empty() { + Ok(root) } else { if Self::is_all_ident(&root, &chain) { return Ok(Expr::CompoundIdentifier(Self::exprs_to_idents( root, chain, )?)); } - if chain.is_empty() { - return Ok(root); - } + Ok(Expr::CompoundFieldAccess { root: Box::new(root), - access_chain: chain.clone(), + access_chain: chain, }) } } @@ -12935,7 +12942,7 @@ impl<'a> Parser<'a> { pub fn parse_select_item(&mut self) -> Result { match self.parse_wildcard_expr()? { Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( - prefix, + SelectItemQualifiedWildcardKind::ObjectName(prefix), self.parse_wildcard_additional_options(token.0)?, )), Expr::Wildcard(token) => Ok(SelectItem::Wildcard( @@ -12965,6 +12972,15 @@ impl<'a> Parser<'a> { alias, }) } + expr if self.dialect.supports_select_expr_star() + && self.consume_tokens(&[Token::Period, Token::Mul]) => + { + let wildcard_token = self.get_previous_token().clone(); + Ok(SelectItem::QualifiedWildcard( + SelectItemQualifiedWildcardKind::Expr(expr), + self.parse_wildcard_additional_options(wildcard_token)?, + )) + } expr => self .maybe_parse_select_item_alias() .map(|alias| match alias { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 45d87a8b8..921a37a8a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1540,9 +1540,6 @@ fn parse_hyphenated_table_identifiers() { ])) }) ); - - let error_sql = "select foo-bar.* from foo-bar"; - assert!(bigquery().parse_sql_statements(error_sql).is_err()); } #[test] @@ -2204,6 +2201,14 @@ fn parse_extract_weekday() { ); } +#[test] +fn bigquery_select_expr_star() { + bigquery() + .verified_only_select("SELECT STRUCT((SELECT foo FROM T WHERE true)).* FROM T"); + bigquery().verified_only_select("SELECT [STRUCT('foo')][0].* EXCEPT (foo) FROM T"); + bigquery().verified_only_select("SELECT myfunc()[0].* FROM T"); +} + #[test] fn test_select_as_struct() { bigquery().verified_only_select("SELECT * FROM (SELECT AS VALUE STRUCT(123 AS a, false AS b))"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2489ce2d9..5dd74e1fa 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1002,7 +1002,7 @@ fn parse_select_wildcard() { let select = verified_only_select(sql); assert_eq!( &SelectItem::QualifiedWildcard( - ObjectName::from(vec![Ident::new("foo")]), + SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("foo")])), WildcardAdditionalOptions::default() ), only(&select.projection) @@ -1012,7 +1012,10 @@ fn parse_select_wildcard() { let select = verified_only_select(sql); assert_eq!( &SelectItem::QualifiedWildcard( - ObjectName::from(vec![Ident::new("myschema"), Ident::new("mytable"),]), + SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("mytable"), + ])), WildcardAdditionalOptions::default(), ), only(&select.projection) @@ -1057,6 +1060,84 @@ fn parse_column_aliases() { one_statement_parses_to("SELECT a.col + 1 newname FROM foo AS a", sql); } +#[test] +fn parse_select_expr_star() { + let dialects = all_dialects_where(|d| d.supports_select_expr_star()); + + // Identifier wildcard expansion. + let select = dialects.verified_only_select("SELECT foo.bar.* FROM T"); + let SelectItem::QualifiedWildcard(SelectItemQualifiedWildcardKind::ObjectName(object_name), _) = + only(&select.projection) + else { + unreachable!( + "expected wildcard select item: got {:?}", + &select.projection[0] + ) + }; + assert_eq!( + &ObjectName::from( + ["foo", "bar"] + .into_iter() + .map(Ident::new) + .collect::>() + ), + object_name + ); + + // Arbitrary compound expression with wildcard expansion. + let select = dialects.verified_only_select("SELECT foo - bar.* FROM T"); + let SelectItem::QualifiedWildcard( + SelectItemQualifiedWildcardKind::Expr(Expr::BinaryOp { left, op, right }), + _, + ) = only(&select.projection) + else { + unreachable!( + "expected wildcard select item: got {:?}", + &select.projection[0] + ) + }; + let (Expr::Identifier(left), BinaryOperator::Minus, Expr::Identifier(right)) = + (left.as_ref(), op, right.as_ref()) + else { + unreachable!("expected binary op expr: got {:?}", &select.projection[0]) + }; + assert_eq!(&Ident::new("foo"), left); + assert_eq!(&Ident::new("bar"), right); + + // Arbitrary expression wildcard expansion. + let select = dialects.verified_only_select("SELECT myfunc().foo.* FROM T"); + let SelectItem::QualifiedWildcard( + SelectItemQualifiedWildcardKind::Expr(Expr::CompoundFieldAccess { root, access_chain }), + _, + ) = only(&select.projection) + else { + unreachable!("expected wildcard expr: got {:?}", &select.projection[0]) + }; + assert!(matches!(root.as_ref(), Expr::Function(_))); + assert_eq!(1, access_chain.len()); + assert!(matches!( + &access_chain[0], + AccessExpr::Dot(Expr::Identifier(_)) + )); + + dialects.one_statement_parses_to( + "SELECT 2. * 3 FROM T", + #[cfg(feature = "bigdecimal")] + "SELECT 2 * 3 FROM T", + #[cfg(not(feature = "bigdecimal"))] + "SELECT 2. * 3 FROM T", + ); + dialects.verified_only_select("SELECT myfunc().* FROM T"); + dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T"); + + // Invalid + let res = dialects.parse_sql_statements("SELECT foo.*.* FROM T"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: .".to_string()), + res.unwrap_err() + ); +} + #[test] fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index aee6d654c..4289ebd1f 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -160,7 +160,7 @@ fn test_select_wildcard_with_exclude() { let select = duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); let expected = SelectItem::QualifiedWildcard( - ObjectName::from(vec![Ident::new("name")]), + SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), ..Default::default() diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2b2350936..f8b58dd15 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1368,7 +1368,7 @@ fn test_select_wildcard_with_exclude() { let select = snowflake_and_generic() .verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); let expected = SelectItem::QualifiedWildcard( - ObjectName::from(vec![Ident::new("name")]), + SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), ..Default::default() @@ -1405,7 +1405,7 @@ fn test_select_wildcard_with_rename() { "SELECT name.* RENAME (department_id AS new_dep, employee_id AS new_emp) FROM employee_table", ); let expected = SelectItem::QualifiedWildcard( - ObjectName::from(vec![Ident::new("name")]), + SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { opt_rename: Some(RenameSelectItem::Multiple(vec![ IdentWithAlias {