diff --git a/zetasql/parser/ast_node_kind.h b/zetasql/parser/ast_node_kind.h index 7f05b8090..8517ef689 100755 --- a/zetasql/parser/ast_node_kind.h +++ b/zetasql/parser/ast_node_kind.h @@ -317,6 +317,7 @@ enum ASTNodeKind { AST_SHOW_TARGET_EXPRESSION, AST_DEPLOY_STATEMENT, AST_LOAD_DATA_STATEMENT, + AST_SELECT_INTO_STATEMENT, AST_WITH_WEIGHT, kLastASTNodeKind = AST_WITH_WEIGHT }; diff --git a/zetasql/parser/bison_parser.y b/zetasql/parser/bison_parser.y index 6b01141e9..830de3b87 100644 --- a/zetasql/parser/bison_parser.y +++ b/zetasql/parser/bison_parser.y @@ -872,6 +872,7 @@ using zetasql::ASTDropStatement; %token KW_ONLY "ONLY" %token KW_OPTIONS "OPTIONS" %token KW_OUT "OUT" +%token KW_OUTFILE "OUTFILE" %token KW_PERCENT "PERCENT" %token KW_PIVOT "PIVOT" %token KW_POLICIES "POLICIES" @@ -1079,6 +1080,8 @@ using zetasql::ASTDropStatement; %type import_statement %type load_statement %type load_data_statement +%type into_statement +%type select_into_statement %type variable_declaration %type opt_default_expression %type identifier_list @@ -1587,6 +1590,7 @@ sql_statement_body: | use_statement | deploy_statement | load_statement + | select_into_statement ; query_statement: @@ -3390,6 +3394,13 @@ load_statement: } ; +select_into_statement: + query "INTO" "OUTFILE" string_literal opt_options_list + { + $$ = MAKE_NODE(ASTSelectIntoStatement, @$, {$1, $4, $5}) + } + ; + load_data_statement: "LOAD" "DATA" "INFILE" string_literal "INTO" "TABLE" path_expression opt_options_list { @@ -7353,6 +7364,7 @@ keyword_as_identifier: | "ONLY" | "OPTIONS" | "OUT" + | "OUTFILE" | "PERCENT" | "PIVOT" | "POLICIES" diff --git a/zetasql/parser/flex_tokenizer.l b/zetasql/parser/flex_tokenizer.l index 6a6c5e2de..5bfcd8481 100644 --- a/zetasql/parser/flex_tokenizer.l +++ b/zetasql/parser/flex_tokenizer.l @@ -560,6 +560,7 @@ options { return BisonParserImpl::token::KW_OPTIONS; } or { return BisonParserImpl::token::KW_OR; } order { return BisonParserImpl::token::KW_ORDER; } out { return BisonParserImpl::token::KW_OUT; } +outfile { return BisonParserImpl::token::KW_OUTFILE; }; outer { return BisonParserImpl::token::KW_OUTER; } over { return BisonParserImpl::token::KW_OVER; } partition { return BisonParserImpl::token::KW_PARTITION; } diff --git a/zetasql/parser/keywords.cc b/zetasql/parser/keywords.cc index de5d88b49..167ac87db 100644 --- a/zetasql/parser/keywords.cc +++ b/zetasql/parser/keywords.cc @@ -215,6 +215,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = { {"or", KW_OR, KeywordInfo::kReserved}, {"order", KW_ORDER, KeywordInfo::kReserved}, {"out", KW_OUT}, + {"outfile", KW_OUTFILE}, {"outer", KW_OUTER, KeywordInfo::kReserved}, {"over", KW_OVER, KeywordInfo::kReserved}, {"partition", KW_PARTITION, KeywordInfo::kReserved}, diff --git a/zetasql/parser/parse_tree.cc b/zetasql/parser/parse_tree.cc index 87fd2d3b1..c3d0dd9a4 100644 --- a/zetasql/parser/parse_tree.cc +++ b/zetasql/parser/parse_tree.cc @@ -352,6 +352,7 @@ static absl::flat_hash_map CreateNodeNamesMap() { map[AST_SHOW_TARGET_EXPRESSION] = "ShowTargetExpression"; map[AST_DEPLOY_STATEMENT] = "DeployStatement"; map[AST_LOAD_DATA_STATEMENT] = "LoadDataStatement"; + map[AST_SELECT_INTO_STATEMENT] = "SelectIntoStatement"; map[AST_WITH_WEIGHT] = "WithWeight"; map[AST_WITH_PARTITION_COLUMNS_CLAUSE] = "WithPartitionColumnsClause"; for (int kind = kFirstASTNodeKind; kind <= kLastASTNodeKind; diff --git a/zetasql/parser/parse_tree_manual.h b/zetasql/parser/parse_tree_manual.h index e8bb8ec26..190000762 100644 --- a/zetasql/parser/parse_tree_manual.h +++ b/zetasql/parser/parse_tree_manual.h @@ -737,6 +737,28 @@ class ASTImportStatement final : public ASTStatement { const ASTOptionsList* options_list_ = nullptr; // May be NULL. }; +class ASTSelectIntoStatement final : public ASTStatement { + public: + static constexpr ASTNodeKind kConcreteNodeKind = AST_SELECT_INTO_STATEMENT; + explicit ASTSelectIntoStatement() : ASTStatement(kConcreteNodeKind) {} + void Accept(ParseTreeVisitor* visitor, void* data) const override; + zetasql_base::StatusOr Accept( + NonRecursiveParseTreeVisitor* visitor) const override; + const ASTQuery* query() const { return query_; } + const ASTStringLiteral* out_file() const { return out_file_; } + const ASTOptionsList* options_list() const { return options_list_; } +private: + void InitFields() final { + FieldLoader fl(this); + fl.AddRequired(&query_); + fl.AddRequired(&out_file_); + fl.AddOptional(&options_list_, AST_OPTIONS_LIST); + } + + const ASTQuery* query_= nullptr; + const ASTStringLiteral* out_file_ = nullptr; + const ASTOptionsList* options_list_ = nullptr; +}; // super class of all load statements class ASTLoadStatement : public ASTStatement { public: diff --git a/zetasql/parser/run_parser_test.cc b/zetasql/parser/run_parser_test.cc index 921fd15fe..d6471b848 100644 --- a/zetasql/parser/run_parser_test.cc +++ b/zetasql/parser/run_parser_test.cc @@ -677,12 +677,22 @@ class RunParserTest : public ::testing::Test { // The NodeKind on the AST should match that in the extracted properties. ASTNodeKind found_statement_kind = statement->node_kind(); - EXPECT_EQ(found_statement_kind, extracted_statement_properties.node_kind); - if (found_statement_kind != extracted_statement_properties.node_kind) { - test_outputs->push_back(absl::StrCat( - "FAILED guessing statement kind. Extracted kind ", - ASTNode::NodeKindToString(extracted_statement_properties.node_kind), - ", got ", ASTNode::NodeKindToString(found_statement_kind))); + if (found_statement_kind == AST_SELECT_INTO_STATEMENT) { + EXPECT_EQ(AST_QUERY_STATEMENT, extracted_statement_properties.node_kind); + if (AST_QUERY_STATEMENT != extracted_statement_properties.node_kind) { + test_outputs->push_back(absl::StrCat( + "FAILED guessing statement kind. Extracted kind ", + ASTNode::NodeKindToString(extracted_statement_properties.node_kind), + ", got ", ASTNode::NodeKindToString(AST_QUERY_STATEMENT))); + } + } else { + EXPECT_EQ(found_statement_kind, extracted_statement_properties.node_kind); + if (found_statement_kind != extracted_statement_properties.node_kind) { + test_outputs->push_back(absl::StrCat( + "FAILED guessing statement kind. Extracted kind ", + ASTNode::NodeKindToString(extracted_statement_properties.node_kind), + ", got ", ASTNode::NodeKindToString(found_statement_kind))); + } } // The CREATE scope on the AST should match that in the extracted diff --git a/zetasql/parser/testdata/select_into.test b/zetasql/parser/testdata/select_into.test new file mode 100644 index 000000000..a45b4330c --- /dev/null +++ b/zetasql/parser/testdata/select_into.test @@ -0,0 +1,100 @@ +select col1, col2 from t1 into outfile 'data.csv'; +-- +SelectIntoStatement [0-49] + Query [0-25] + Select [0-25] + SelectList [7-17] + SelectColumn [7-11] + PathExpression [7-11] + Identifier(col1) [7-11] + SelectColumn [13-17] + PathExpression [13-17] + Identifier(col2) [13-17] + FromClause [18-25] + TablePathExpression [23-25] + PathExpression [23-25] + Identifier(t1) [23-25] + StringLiteral('data.csv') [39-49] +-- +SELECT + col1, + col2 +FROM + t1 +INTO OUTFILE 'data.csv' +== + +select col1, col2 from t1 into outfile 'data.csv' options (charset = 'utf-8'); +-- +SelectIntoStatement [0-77] + Query [0-25] + Select [0-25] + SelectList [7-17] + SelectColumn [7-11] + PathExpression [7-11] + Identifier(col1) [7-11] + SelectColumn [13-17] + PathExpression [13-17] + Identifier(col2) [13-17] + FromClause [18-25] + TablePathExpression [23-25] + PathExpression [23-25] + Identifier(t1) [23-25] + StringLiteral('data.csv') [39-49] + OptionsList [58-77] + OptionsEntry [59-76] + Identifier(charset) [59-66] + StringLiteral('utf-8') [69-76] +-- +SELECT + col1, + col2 +FROM + t1 +INTO OUTFILE 'data.csv' OPTIONS(charset = 'utf-8') +== + +select col1, col2 from db1.t1 into outfile 'data.csv' options (charset = 'utf-8'); +-- +SelectIntoStatement [0-81] + Query [0-29] + Select [0-29] + SelectList [7-17] + SelectColumn [7-11] + PathExpression [7-11] + Identifier(col1) [7-11] + SelectColumn [13-17] + PathExpression [13-17] + Identifier(col2) [13-17] + FromClause [18-29] + TablePathExpression [23-29] + PathExpression [23-29] + Identifier(db1) [23-26] + Identifier(t1) [27-29] + StringLiteral('data.csv') [43-53] + OptionsList [62-81] + OptionsEntry [63-80] + Identifier(charset) [63-70] + StringLiteral('utf-8') [73-80] +-- +SELECT + col1, + col2 +FROM + db1.t1 +INTO OUTFILE 'data.csv' OPTIONS(charset = 'utf-8') +== + +select col1, col2 from db1.t1 into outfile; +-- +ERROR: Syntax error: Expected string literal but got ";" [at 1:43] +select col1, col2 from db1.t1 into outfile; + ^ +== + +select col1, col2 from db1.t1 into outfile options (charset = 'utf-8'); +-- +ERROR: Syntax error: Expected string literal but got keyword OPTIONS [at 1:44] +select col1, col2 from db1.t1 into outfile options (charset = 'utf-8'); + ^ +== diff --git a/zetasql/parser/testdata/set.test b/zetasql/parser/testdata/set.test index 0df2433a0..2586e3f72 100644 --- a/zetasql/parser/testdata/set.test +++ b/zetasql/parser/testdata/set.test @@ -7,6 +7,15 @@ SingleAssignment [0-7] SET a = 3 == +SET SELECT_MODE = 'TRINO' +-- +SingleAssignment [0-25] + Identifier(SELECT_MODE) [4-15] + StringLiteral('TRINO') [18-25] +-- +SET SELECT_MODE = 'TRINO' +== + set a = (((a + 1) + 1) + 1) -- SingleAssignment [0-27] diff --git a/zetasql/parser/unparser.cc b/zetasql/parser/unparser.cc index 0f63671d5..65007b41d 100644 --- a/zetasql/parser/unparser.cc +++ b/zetasql/parser/unparser.cc @@ -1086,6 +1086,15 @@ void Unparser::visitASTLoadDataStatement(const ASTLoadDataStatement* node, void* } } +void Unparser::visitASTSelectIntoStatement(const ASTSelectIntoStatement* node, void* data) { + node->query()->Accept(this, data); + print("INTO OUTFILE"); + node->out_file()->Accept(this, data); + if (node->options_list() != nullptr) { + print("OPTIONS"); + node->options_list()->Accept(this, data); + } +} void Unparser::visitASTModuleStatement(const ASTModuleStatement* node, void* data) { print("MODULE"); diff --git a/zetasql/parser/unparser.h b/zetasql/parser/unparser.h index 1202eac86..cf370f81b 100644 --- a/zetasql/parser/unparser.h +++ b/zetasql/parser/unparser.h @@ -242,6 +242,7 @@ class Unparser : public ParseTreeVisitor { void visitASTImportStatement(const ASTImportStatement* node, void* data) override; void visitASTLoadDataStatement(const ASTLoadDataStatement* node, void* data) override; + void visitASTSelectIntoStatement(const ASTSelectIntoStatement* node, void* data) override; void visitASTModuleStatement(const ASTModuleStatement* node, void* data) override; void visitASTWithClause(const ASTWithClause* node, void* data) override;