Skip to content

Commit

Permalink
move setting of recursive ctes to slt file and add test to ensure mul…
Browse files Browse the repository at this point in the history
…tiple record batches are produced each iteration
  • Loading branch information
matthewgapp committed Jan 15, 2024
1 parent 2fc2ad2 commit f67763a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
6 changes: 0 additions & 6 deletions datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ impl TestContext {
// hardcode target partitions so plans are deterministic
.with_target_partitions(4);

// for all tests except information_schema.slt, enable recursive CTEs
config.options_mut().execution.enable_recursive_ctes = relative_path
.file_name()
.map(|s| s == "cte.slt")
.unwrap_or(false);

let mut test_ctx = TestContext::new(SessionContext::new_with_config(config));

let file_name = relative_path.file_name().unwrap().to_str().unwrap();
Expand Down
48 changes: 48 additions & 0 deletions datafusion/sqllogictest/test_files/cte.slt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ select * from (WITH source AS (select 1 as e) SELECT * FROM source) t1, (WITH
----
1 1

# enable recursive CTEs
statement ok
set datafusion.execution.enable_recursive_ctes = true;

# trivial recursive CTE works
query I rowsort
WITH RECURSIVE nodes AS (
Expand Down Expand Up @@ -81,7 +85,48 @@ CREATE EXTERNAL TABLE balance STORED as CSV WITH HEADER ROW LOCATION '../../test
statement ok
CREATE EXTERNAL TABLE growth STORED as CSV WITH HEADER ROW LOCATION '../../testing/data/csv/r_cte_growth.csv'

# setup
statement ok
set datafusion.execution.batch_size = 2;

# recursive CTE with static term derived from table works.
# use explain to ensure that batch size is set to 2. This should produce multiple batches per iteration since the input
# table 'balances' has 4 rows
query TT
EXPLAIN WITH RECURSIVE balances AS (
SELECT * from balance
UNION ALL
SELECT time + 1 as time, name, account_balance + 10 as account_balance
FROM balances
WHERE time < 10
)
SELECT * FROM balances
ORDER BY time, name, account_balance
----
logical_plan
Sort: balances.time ASC NULLS LAST, balances.name ASC NULLS LAST, balances.account_balance ASC NULLS LAST
--Projection: balances.time, balances.name, balances.account_balance
----SubqueryAlias: balances
------RecursiveQuery: is_distinct=false
--------Projection: balance.time, balance.name, balance.account_balance
----------TableScan: balance
--------Projection: balances.time + Int64(1) AS time, balances.name, balances.account_balance + Int64(10) AS account_balance
----------Filter: balances.time < Int64(10)
------------TableScan: balances
physical_plan
SortExec: expr=[time@0 ASC NULLS LAST,name@1 ASC NULLS LAST,account_balance@2 ASC NULLS LAST]
--RecursiveQueryExec: is_distinct=false
----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/r_cte_balance.csv]]}, projection=[time, name, account_balance], has_header=true
----CoalescePartitionsExec
------ProjectionExec: expr=[time@0 + 1 as time, name@1 as name, account_balance@2 + 10 as account_balance]
--------CoalesceBatchesExec: target_batch_size=2
----------FilterExec: time@0 < 10
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
--------------WorkTableExec: name=balances

# recursive CTE with static term derived from table works
# note that this is run with batch size set to 2. This should produce multiple batches per iteration since the input
# table 'balances' has 4 rows
query ITI
WITH RECURSIVE balances AS (
SELECT * from balance
Expand Down Expand Up @@ -132,6 +177,9 @@ ORDER BY time, name, account_balance
10 Tim 290
10 Tim 480

# reset batch size to default
statement ok
set datafusion.execution.batch_size = 8182;

# recursive CTE with recursive join works
query ITI
Expand Down

0 comments on commit f67763a

Please sign in to comment.